software copyright

2026-03-22 15:24:40 +08:00
parent e303bb868a
commit 60f336e345
155 changed files with 127262 additions and 0 deletions
@@ -0,0 +1,352 @@
+# -*- coding: utf-8 -*-
+"""
+自然写手写识别与AI分析引擎软件 V1.0
+
+OCR识别接口模块
+提供中英文手写文字OCR识别服务，基于PaddleOCR推理管道
+"""
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, Field
+from typing import List, Optional, Dict, Any
+import numpy as np
+import logging
+import time
+import uuid
+
+logger = logging.getLogger("writech-ai-engine.ocr")
+router = APIRouter()
+
+
+# ==================== 请求/响应模型定义 ====================
+
+class StrokePoint(BaseModel):
+    """笔迹坐标点"""
+    x: int = Field(..., ge=0, le=65535, description="X坐标")
+    y: int = Field(..., ge=0, le=65535, description="Y坐标")
+    pressure: int = Field(0, ge=0, le=255, description="压力值")
+    timestamp: int = Field(..., description="时间戳(毫秒)")
+    pen_up: bool = Field(False, description="抬笔标记")
+
+
+class OCRRequest(BaseModel):
+    """OCR识别请求"""
+    strokes: List[List[StrokePoint]] = Field(..., description="笔迹数据(按笔画分组)")
+    page_id: Optional[str] = Field(None, description="点阵码页面ID")
+    pen_id: Optional[str] = Field(None, description="笔设备ID")
+    language: str = Field("zh", description="识别语言: zh/en/mixed")
+    recognition_mode: str = Field("line", description="识别模式: char/word/line/page")
+
+
+class CharDetail(BaseModel):
+    """单字识别详情"""
+    char: str = Field(..., description="识别的字符")
+    confidence: float = Field(..., description="置信度(0-1)")
+    bbox: List[int] = Field(..., description="包围框[x1,y1,x2,y2]")
+    stroke_indices: List[int] = Field(default=[], description="对应的笔画索引")
+
+
+class OCRResult(BaseModel):
+    """OCR识别结果"""
+    text: str = Field(..., description="识别文本")
+    confidence: float = Field(..., description="整体置信度(0-1)")
+    bbox: List[int] = Field(default=[], description="文本区域包围框")
+    char_details: List[CharDetail] = Field(default=[], description="逐字详情")
+
+
+class OCRResponse(BaseModel):
+    """OCR识别响应"""
+    code: int = 200
+    msg: str = "success"
+    data: Optional[Dict[str, Any]] = None
+
+
+# ==================== OCR 推理引擎 ====================
+
+class OCREngine:
+    """
+    PaddleOCR 推理引擎
+    
+    推理管道流程：
+    笔迹坐标 → 预处理（归一化/去噪） → 笔画分割 
+    → 模型推理（OCR） → 后处理（置信度过滤/结果合并） → 结果输出
+    
+    支持的识别模式：
+    - char: 单字识别（逐字识别，返回每个字的详情）
+    - word: 词组识别（按词分割识别）
+    - line: 行识别（按行识别，默认模式）
+    - page: 整页识别（全页文字识别）
+    """
+
+    def __init__(self):
+        """初始化OCR推理引擎"""
+        self.model = None
+        self.model_version = "1.0.0"
+        self.is_loaded = False
+        # 模型输入图像尺寸
+        self.input_height = 48
+        self.input_width = 320
+        # 置信度阈值
+        self.confidence_threshold = 0.5
+        logger.info("OCR引擎初始化完成")
+
+    def load_model(self, model_path: str):
+        """
+        加载PaddleOCR模型
+        模型文件AES-256加密存储，推理时内存解密加载
+        """
+        logger.info(f"加载OCR模型: {model_path}")
+        # 解密模型文件
+        # decrypted_model = self._decrypt_model(model_path)
+        # self.model = paddle.jit.load(decrypted_model)
+        self.is_loaded = True
+        logger.info("OCR模型加载完成")
+
+    def preprocess_strokes(self, strokes: List[List[StrokePoint]]) -> np.ndarray:
+        """
+        笔迹预处理管道
+        
+        步骤：
+        1. 坐标归一化（映射到标准画布尺寸）
+        2. 去噪处理（滤除抖动和异常点）
+        3. 笔迹渲染为灰度图像
+        4. 图像尺寸归一化（resize到模型输入尺寸）
+        """
+        # 计算所有点的边界框
+        all_points = []
+        for stroke in strokes:
+            for point in stroke:
+                all_points.append((point.x, point.y))
+
+        if not all_points:
+            return np.zeros((1, self.input_height, self.input_width), dtype=np.float32)
+
+        xs = [p[0] for p in all_points]
+        ys = [p[1] for p in all_points]
+        min_x, max_x = min(xs), max(xs)
+        min_y, max_y = min(ys), max(ys)
+
+        # 计算缩放比例（保持宽高比）
+        width = max(max_x - min_x, 1)
+        height = max(max_y - min_y, 1)
+        scale = min(self.input_width / width, self.input_height / height) * 0.9
+
+        # 创建渲染画布
+        canvas = np.zeros((self.input_height, self.input_width), dtype=np.float32)
+
+        # 渲染笔迹到画布
+        for stroke in strokes:
+            for i in range(1, len(stroke)):
+                x1 = int((stroke[i - 1].x - min_x) * scale)
+                y1 = int((stroke[i - 1].y - min_y) * scale)
+                x2 = int((stroke[i].x - min_x) * scale)
+                y2 = int((stroke[i].y - min_y) * scale)
+                # 使用Bresenham算法画线
+                self._draw_line(canvas, x1, y1, x2, y2,
+                                thickness=max(1, stroke[i].pressure // 85))
+
+        # 归一化到[0, 1]
+        if canvas.max() > 0:
+            canvas = canvas / canvas.max()
+
+        return canvas.reshape(1, self.input_height, self.input_width)
+
+    def recognize(self, strokes: List[List[StrokePoint]],
+                  mode: str = "line") -> List[OCRResult]:
+        """
+        执行OCR识别
+        
+        @param strokes: 笔迹数据（按笔画分组）
+        @param mode: 识别模式 (char/word/line/page)
+        @return: 识别结果列表
+        """
+        start_time = time.time()
+
+        # 预处理
+        image = self.preprocess_strokes(strokes)
+
+        # 模型推理
+        # predictions = self.model(image)
+        # 模拟推理结果
+        predictions = self._mock_inference(image, mode)
+
+        # 后处理（置信度过滤、结果合并）
+        results = self._postprocess(predictions, mode)
+
+        inference_time = time.time() - start_time
+        logger.info(f"OCR识别完成, mode={mode}, time={inference_time:.4f}s, "
+                     f"results={len(results)}")
+
+        return results
+
+    def _postprocess(self, predictions: Dict, mode: str) -> List[OCRResult]:
+        """
+        后处理：置信度过滤 + 结果合并
+        
+        - 过滤低于阈值的识别结果
+        - 相邻字符合并为词/行
+        - 生成逐字详情信息
+        """
+        results = []
+
+        if mode == "char":
+            # 逐字模式：返回每个字符的独立结果
+            for char_pred in predictions.get("chars", []):
+                if char_pred["confidence"] >= self.confidence_threshold:
+                    result = OCRResult(
+                        text=char_pred["char"],
+                        confidence=char_pred["confidence"],
+                        bbox=char_pred["bbox"],
+                        char_details=[CharDetail(
+                            char=char_pred["char"],
+                            confidence=char_pred["confidence"],
+                            bbox=char_pred["bbox"],
+                            stroke_indices=char_pred.get("stroke_indices", [])
+                        )]
+                    )
+                    results.append(result)
+
+        elif mode in ("line", "page"):
+            # 行/页模式：合并字符为文本行
+            for line_pred in predictions.get("lines", []):
+                if line_pred["confidence"] >= self.confidence_threshold:
+                    char_details = [
+                        CharDetail(
+                            char=cd["char"],
+                            confidence=cd["confidence"],
+                            bbox=cd["bbox"],
+                            stroke_indices=cd.get("stroke_indices", [])
+                        )
+                        for cd in line_pred.get("char_details", [])
+                    ]
+                    result = OCRResult(
+                        text=line_pred["text"],
+                        confidence=line_pred["confidence"],
+                        bbox=line_pred["bbox"],
+                        char_details=char_details
+                    )
+                    results.append(result)
+
+        return results
+
+    def _draw_line(self, canvas: np.ndarray, x1: int, y1: int,
+                   x2: int, y2: int, thickness: int = 1):
+        """Bresenham直线绘制算法"""
+        h, w = canvas.shape
+        dx = abs(x2 - x1)
+        dy = abs(y2 - y1)
+        sx = 1 if x1 < x2 else -1
+        sy = 1 if y1 < y2 else -1
+        err = dx - dy
+
+        while True:
+            # 绘制像素（带粗细）
+            for tx in range(-thickness, thickness + 1):
+                for ty in range(-thickness, thickness + 1):
+                    px, py = x1 + tx, y1 + ty
+                    if 0 <= px < w and 0 <= py < h:
+                        canvas[py][px] = 1.0
+
+            if x1 == x2 and y1 == y2:
+                break
+            e2 = 2 * err
+            if e2 > -dy:
+                err -= dy
+                x1 += sx
+            if e2 < dx:
+                err += dx
+                y1 += sy
+
+    def _mock_inference(self, image: np.ndarray, mode: str) -> Dict:
+        """模拟推理结果（用于示例）"""
+        return {
+            "lines": [{
+                "text": "示例文字",
+                "confidence": 0.95,
+                "bbox": [10, 10, 200, 48],
+                "char_details": [
+                    {"char": "示", "confidence": 0.96, "bbox": [10, 10, 50, 48]},
+                    {"char": "例", "confidence": 0.94, "bbox": [50, 10, 100, 48]},
+                    {"char": "文", "confidence": 0.97, "bbox": [100, 10, 150, 48]},
+                    {"char": "字", "confidence": 0.93, "bbox": [150, 10, 200, 48]}
+                ]
+            }],
+            "chars": []
+        }
+
+    def _decrypt_model(self, model_path: str) -> str:
+        """AES-256解密模型文件"""
+        # 使用预配置的密钥解密模型文件
+        # key = settings.model_encryption_key
+        # cipher = AES.new(key, AES.MODE_CBC, iv)
+        return model_path
+
+
+# 全局OCR引擎实例
+ocr_engine = OCREngine()
+
+
+# ==================== API 路由 ====================
+
+@router.post("/recognize", response_model=OCRResponse)
+async def recognize_text(request: OCRRequest):
+    """
+    手写文字OCR识别接口
+    POST /api/v1/ocr/recognize
+    
+    接收笔迹坐标数据，返回识别文本及逐字详情
+    支持中文、英文及中英混合识别
+    """
+    # 输入校验
+    if not request.strokes:
+        raise HTTPException(status_code=400, detail="笔迹数据不能为空")
+
+    total_points = sum(len(stroke) for stroke in request.strokes)
+    if total_points > 50000:
+        raise HTTPException(status_code=400, detail="笔迹点数过多，最大支持50000点")
+
+    # 执行OCR识别
+    results = ocr_engine.recognize(
+        strokes=request.strokes,
+        mode=request.recognition_mode
+    )
+
+    # 构建响应
+    return OCRResponse(
+        code=200,
+        msg="success",
+        data={
+            "request_id": str(uuid.uuid4()),
+            "language": request.language,
+            "mode": request.recognition_mode,
+            "results": [r.dict() for r in results],
+            "total_chars": sum(len(r.text) for r in results)
+        }
+    )
+
+
+@router.post("/batch-recognize")
+async def batch_recognize(requests: List[OCRRequest]):
+    """
+    批量OCR识别接口
+    一次请求识别多组笔迹数据
+    """
+    results = []
+    for req in requests:
+        result = ocr_engine.recognize(
+            strokes=req.strokes,
+            mode=req.recognition_mode
+        )
+        results.append({
+            "page_id": req.page_id,
+            "results": [r.dict() for r in result]
+        })
+
+    return {
+        "code": 200,
+        "msg": "success",
+        "data": {
+            "batch_size": len(requests),
+            "results": results
+        }
+    }