software copyright
This commit is contained in:
@@ -0,0 +1,446 @@
|
||||
# 自然写手写识别与AI分析引擎软件 V1.0
|
||||
# 作文批改接口模块 - AI作文评分与批改建议服务
|
||||
|
||||
"""
|
||||
作文批改API接口
|
||||
提供AI作文评分、多维度分析(结构/语法/内容/修辞)、批改建议生成等功能
|
||||
支持小学至初中阶段作文批改,基于大语言模型与NLP分析管道
|
||||
"""
|
||||
|
||||
import time
|
||||
import json
|
||||
import logging
|
||||
import hashlib
|
||||
import re
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from fastapi import APIRouter, HTTPException, Depends
|
||||
from pydantic import BaseModel, Field, validator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ==================== 数据模型定义 ====================
|
||||
|
||||
class EssayReviewRequest(BaseModel):
|
||||
"""作文批改请求"""
|
||||
text: str = Field(..., min_length=10, max_length=5000, description="作文OCR识别文本")
|
||||
title: Optional[str] = Field(None, description="作文题目")
|
||||
grade: int = Field(3, ge=1, le=9, description="年级(1-9)")
|
||||
genre: str = Field("narrative", description="文体类型: narrative/argumentative/expository/descriptive")
|
||||
max_score: int = Field(100, description="满分值")
|
||||
student_id: Optional[str] = Field(None, description="学生ID")
|
||||
assignment_id: Optional[str] = Field(None, description="作业ID")
|
||||
enable_suggestions: bool = Field(True, description="是否生成修改建议")
|
||||
|
||||
@validator('genre')
|
||||
def validate_genre(cls, v):
|
||||
valid_genres = ['narrative', 'argumentative', 'expository', 'descriptive']
|
||||
if v not in valid_genres:
|
||||
raise ValueError(f'文体类型必须为: {valid_genres}')
|
||||
return v
|
||||
|
||||
|
||||
class SentenceError(BaseModel):
|
||||
"""句子级错误标注"""
|
||||
sentence: str = Field(..., description="原始句子")
|
||||
error_type: str = Field(..., description="错误类型")
|
||||
suggestion: str = Field(..., description="修改建议")
|
||||
position: int = Field(..., description="句子在原文中的位置索引")
|
||||
|
||||
|
||||
class EssayScoreDetail(BaseModel):
|
||||
"""作文各维度评分详情"""
|
||||
structure: float = Field(..., description="结构分")
|
||||
grammar: float = Field(..., description="语法分")
|
||||
content: float = Field(..., description="内容分")
|
||||
rhetoric: float = Field(..., description="修辞分")
|
||||
handwriting: Optional[float] = Field(None, description="书写分(如有)")
|
||||
|
||||
|
||||
# ==================== 文本分析工具 ====================
|
||||
|
||||
class TextAnalyzer:
|
||||
"""
|
||||
文本分析工具类
|
||||
提供基础的中文文本分析功能:分句、词频统计、句式分析等
|
||||
"""
|
||||
|
||||
# 中文句末标点
|
||||
SENTENCE_ENDINGS = {'。', '!', '?', '……', ';'}
|
||||
# 中文段落标识
|
||||
PARAGRAPH_INDENT = ' '
|
||||
|
||||
@staticmethod
|
||||
def split_sentences(text: str) -> List[str]:
|
||||
"""将文本分割为句子列表"""
|
||||
sentences = []
|
||||
current = ""
|
||||
for char in text:
|
||||
current += char
|
||||
if char in TextAnalyzer.SENTENCE_ENDINGS:
|
||||
if current.strip():
|
||||
sentences.append(current.strip())
|
||||
current = ""
|
||||
if current.strip():
|
||||
sentences.append(current.strip())
|
||||
return sentences
|
||||
|
||||
@staticmethod
|
||||
def split_paragraphs(text: str) -> List[str]:
|
||||
"""将文本分割为段落列表"""
|
||||
# 按换行符分割,过滤空段落
|
||||
paragraphs = [p.strip() for p in text.split('\n') if p.strip()]
|
||||
return paragraphs
|
||||
|
||||
@staticmethod
|
||||
def count_characters(text: str) -> Dict[str, int]:
|
||||
"""统计文本字符数"""
|
||||
chinese_count = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
|
||||
punctuation_count = sum(1 for c in text if c in ',。!?、;:""''()《》……—')
|
||||
total_count = len(text.replace(' ', '').replace('\n', ''))
|
||||
return {
|
||||
"total": total_count,
|
||||
"chinese": chinese_count,
|
||||
"punctuation": punctuation_count
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def detect_rhetoric(text: str) -> List[Dict]:
|
||||
"""
|
||||
检测修辞手法使用情况
|
||||
识别常见修辞:比喻、排比、拟人、夸张等
|
||||
"""
|
||||
rhetorics = []
|
||||
|
||||
# 比喻检测:包含"像...一样"、"如同"、"仿佛"等关键词
|
||||
simile_patterns = [
|
||||
r'像.{2,10}一样', r'如同.{2,10}', r'仿佛.{2,10}',
|
||||
r'好像.{2,10}', r'犹如.{2,10}', r'宛如.{2,10}'
|
||||
]
|
||||
for pattern in simile_patterns:
|
||||
matches = re.finditer(pattern, text)
|
||||
for m in matches:
|
||||
rhetorics.append({
|
||||
"type": "simile", "name": "比喻",
|
||||
"text": m.group(), "position": m.start()
|
||||
})
|
||||
|
||||
# 排比检测:连续出现相似句式结构
|
||||
sentences = TextAnalyzer.split_sentences(text)
|
||||
for i in range(len(sentences) - 2):
|
||||
s1, s2, s3 = sentences[i], sentences[i+1], sentences[i+2]
|
||||
# 简化判断:三个连续句子长度相近且首字相同
|
||||
if (abs(len(s1) - len(s2)) < 5 and abs(len(s2) - len(s3)) < 5 and
|
||||
len(s1) > 5 and s1[0] == s2[0] == s3[0]):
|
||||
rhetorics.append({
|
||||
"type": "parallelism", "name": "排比",
|
||||
"text": f"{s1}{s2}{s3}", "position": text.find(s1)
|
||||
})
|
||||
|
||||
# 拟人检测:非人事物使用人的动作词
|
||||
personification_patterns = [
|
||||
r'[风雨雪花树草月阳光河水山].{0,3}[笑哭唱跳跑走说叫]',
|
||||
r'[风雨雪花树草月阳光河水山].{0,3}[温柔轻轻悄悄]'
|
||||
]
|
||||
for pattern in personification_patterns:
|
||||
matches = re.finditer(pattern, text)
|
||||
for m in matches:
|
||||
rhetorics.append({
|
||||
"type": "personification", "name": "拟人",
|
||||
"text": m.group(), "position": m.start()
|
||||
})
|
||||
|
||||
return rhetorics
|
||||
|
||||
|
||||
# ==================== 作文评分引擎 ====================
|
||||
|
||||
class EssayScoringEngine:
|
||||
"""
|
||||
作文评分引擎
|
||||
基于多维度分析管道对作文进行综合评分
|
||||
评分维度:结构(25%)、语法(25%)、内容(30%)、修辞(20%)
|
||||
"""
|
||||
|
||||
# 各年级期望字数范围
|
||||
EXPECTED_LENGTH = {
|
||||
1: (50, 150), 2: (100, 250), 3: (200, 400),
|
||||
4: (300, 500), 5: (350, 600), 6: (400, 700),
|
||||
7: (500, 800), 8: (600, 900), 9: (600, 1000)
|
||||
}
|
||||
|
||||
# 评分维度权重配置
|
||||
DIMENSION_WEIGHTS = {
|
||||
"structure": 0.25,
|
||||
"grammar": 0.25,
|
||||
"content": 0.30,
|
||||
"rhetoric": 0.20
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self._text_analyzer = TextAnalyzer()
|
||||
self._error_patterns = self._load_error_patterns()
|
||||
logger.info("作文评分引擎初始化完成")
|
||||
|
||||
def _load_error_patterns(self) -> List[Dict]:
|
||||
"""加载常见语法错误模式库"""
|
||||
return [
|
||||
{"pattern": r"的的", "type": "repetition", "msg": "重复用字'的的'"},
|
||||
{"pattern": r"了了", "type": "repetition", "msg": "重复用字'了了'"},
|
||||
{"pattern": r"因为.{5,50}因为", "type": "logic", "msg": "重复使用'因为',建议精简"},
|
||||
{"pattern": r"然后.{3,20}然后.{3,20}然后", "type": "style", "msg": "过度使用'然后'连接"},
|
||||
{"pattern": r"非常非常", "type": "repetition", "msg": "重复使用'非常'"},
|
||||
{"pattern": r"[,]{3,}", "type": "punctuation", "msg": "连续使用多个逗号,建议使用句号断句"},
|
||||
]
|
||||
|
||||
def score_structure(self, text: str, grade: int) -> Tuple[float, List[str]]:
|
||||
"""
|
||||
评估文章结构(满分100)
|
||||
检查:段落划分、开头结尾完整性、字数是否达标、层次是否清晰
|
||||
"""
|
||||
comments = []
|
||||
score = 100.0
|
||||
|
||||
paragraphs = self._text_analyzer.split_paragraphs(text)
|
||||
char_stats = self._text_analyzer.count_characters(text)
|
||||
|
||||
# 段落数评估(期望3-8段)
|
||||
if len(paragraphs) < 2:
|
||||
score -= 25
|
||||
comments.append("文章缺少段落划分,建议分段书写使结构更清晰")
|
||||
elif len(paragraphs) < 3:
|
||||
score -= 10
|
||||
comments.append("段落较少,建议增加过渡段落")
|
||||
|
||||
# 字数评估
|
||||
expected = self.EXPECTED_LENGTH.get(grade, (300, 600))
|
||||
if char_stats["chinese"] < expected[0]:
|
||||
deficit = expected[0] - char_stats["chinese"]
|
||||
score -= min(30, deficit // 10)
|
||||
comments.append(f"字数偏少({char_stats['chinese']}字),该年级建议{expected[0]}-{expected[1]}字")
|
||||
elif char_stats["chinese"] > expected[1] * 1.5:
|
||||
score -= 5
|
||||
comments.append("字数偏多,建议精简语句突出重点")
|
||||
|
||||
# 开头结尾评估
|
||||
if paragraphs:
|
||||
first_para = paragraphs[0]
|
||||
last_para = paragraphs[-1]
|
||||
if len(first_para) < 15:
|
||||
score -= 10
|
||||
comments.append("开头过于简短,建议丰富开篇引入")
|
||||
if len(last_para) < 10:
|
||||
score -= 10
|
||||
comments.append("结尾过于简短,建议加强收束呼应主题")
|
||||
|
||||
return max(0, score), comments
|
||||
|
||||
def score_grammar(self, text: str) -> Tuple[float, List[SentenceError]]:
|
||||
"""
|
||||
评估语法正确性(满分100)
|
||||
检查:常见语病、标点使用、词语搭配
|
||||
"""
|
||||
errors = []
|
||||
score = 100.0
|
||||
|
||||
# 使用预定义的错误模式进行匹配检测
|
||||
for ep in self._error_patterns:
|
||||
matches = re.finditer(ep["pattern"], text)
|
||||
for m in matches:
|
||||
errors.append(SentenceError(
|
||||
sentence=m.group(),
|
||||
error_type=ep["type"],
|
||||
suggestion=ep["msg"],
|
||||
position=m.start()
|
||||
))
|
||||
score -= 5 # 每个语法错误扣5分
|
||||
|
||||
# 检查句子长度(过长的句子可能有语病)
|
||||
sentences = self._text_analyzer.split_sentences(text)
|
||||
for i, s in enumerate(sentences):
|
||||
if len(s) > 80:
|
||||
errors.append(SentenceError(
|
||||
sentence=s[:30] + "...",
|
||||
error_type="long_sentence",
|
||||
suggestion="句子过长,建议拆分为多个短句以提高可读性",
|
||||
position=text.find(s)
|
||||
))
|
||||
score -= 3
|
||||
|
||||
return max(0, score), errors
|
||||
|
||||
def score_content(self, text: str, title: Optional[str], genre: str, grade: int) -> Tuple[float, List[str]]:
|
||||
"""
|
||||
评估内容质量(满分100)
|
||||
检查:主题相关性、内容丰富度、逻辑连贯性、情感表达
|
||||
"""
|
||||
comments = []
|
||||
score = 85.0 # 基础分(内容难以精确量化,给予较高基础分)
|
||||
|
||||
char_stats = self._text_analyzer.count_characters(text)
|
||||
sentences = self._text_analyzer.split_sentences(text)
|
||||
|
||||
# 内容丰富度:通过不同词汇的数量粗略评估
|
||||
unique_chars = set(c for c in text if '\u4e00' <= c <= '\u9fff')
|
||||
vocab_richness = len(unique_chars) / max(char_stats["chinese"], 1)
|
||||
if vocab_richness > 0.6:
|
||||
score += 10
|
||||
comments.append("词汇丰富,用词多样化")
|
||||
elif vocab_richness < 0.3:
|
||||
score -= 10
|
||||
comments.append("词汇较为单一,建议使用更丰富的词语表达")
|
||||
|
||||
# 逻辑连贯性:检查是否使用连接词
|
||||
connectors = ['因此', '所以', '但是', '然而', '首先', '其次', '最后', '总之',
|
||||
'不仅', '而且', '虽然', '但', '因为', '于是']
|
||||
used_connectors = [c for c in connectors if c in text]
|
||||
if len(used_connectors) >= 3:
|
||||
score += 5
|
||||
comments.append("逻辑衔接词使用恰当,行文连贯")
|
||||
elif len(used_connectors) == 0 and len(sentences) > 5:
|
||||
score -= 5
|
||||
comments.append("缺少逻辑连接词,建议增加过渡衔接使行文更连贯")
|
||||
|
||||
# 情感表达评估
|
||||
emotion_words = ['开心', '快乐', '高兴', '感动', '难过', '伤心', '惊讶',
|
||||
'温暖', '幸福', '骄傲', '担心', '紧张']
|
||||
used_emotions = [w for w in emotion_words if w in text]
|
||||
if used_emotions:
|
||||
score += 3
|
||||
comments.append("有恰当的情感表达,增强了文章感染力")
|
||||
|
||||
return min(100, max(0, score)), comments
|
||||
|
||||
def score_rhetoric(self, text: str, grade: int) -> Tuple[float, List[str]]:
|
||||
"""
|
||||
评估修辞运用(满分100)
|
||||
检查:修辞手法的使用数量和质量
|
||||
"""
|
||||
comments = []
|
||||
score = 70.0 # 基础分
|
||||
|
||||
rhetorics = self._text_analyzer.detect_rhetoric(text)
|
||||
|
||||
# 根据检测到的修辞数量加分
|
||||
rhetoric_types = set(r["type"] for r in rhetorics)
|
||||
if len(rhetoric_types) >= 3:
|
||||
score += 25
|
||||
comments.append(f"修辞手法运用丰富,使用了{len(rhetoric_types)}种修辞手法")
|
||||
elif len(rhetoric_types) >= 1:
|
||||
score += 15
|
||||
used_names = set(r["name"] for r in rhetorics)
|
||||
comments.append(f"使用了{'、'.join(used_names)}等修辞手法")
|
||||
else:
|
||||
comments.append("建议适当使用比喻、排比等修辞手法增强表达效果")
|
||||
|
||||
# 高年级对修辞有更高要求
|
||||
if grade >= 5 and len(rhetoric_types) < 2:
|
||||
score -= 10
|
||||
comments.append("该年级建议至少使用2种以上修辞手法")
|
||||
|
||||
return min(100, max(0, score)), comments
|
||||
|
||||
def review_essay(self, request: EssayReviewRequest) -> Dict:
|
||||
"""
|
||||
综合批改作文,返回总分和各维度分析结果
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# 各维度独立评分
|
||||
struct_score, struct_comments = self.score_structure(request.text, request.grade)
|
||||
grammar_score, grammar_errors = self.score_grammar(request.text)
|
||||
content_score, content_comments = self.score_content(
|
||||
request.text, request.title, request.genre, request.grade)
|
||||
rhetoric_score, rhetoric_comments = self.score_rhetoric(request.text, request.grade)
|
||||
|
||||
# 按权重计算总分,并映射到满分值
|
||||
weighted_score = (
|
||||
struct_score * self.DIMENSION_WEIGHTS["structure"] +
|
||||
grammar_score * self.DIMENSION_WEIGHTS["grammar"] +
|
||||
content_score * self.DIMENSION_WEIGHTS["content"] +
|
||||
rhetoric_score * self.DIMENSION_WEIGHTS["rhetoric"]
|
||||
)
|
||||
total_score = round(weighted_score / 100 * request.max_score, 1)
|
||||
|
||||
# 字数统计
|
||||
char_stats = TextAnalyzer.count_characters(request.text)
|
||||
|
||||
# 生成综合评语
|
||||
overall_comment = self._generate_overall_comment(
|
||||
total_score, request.max_score, struct_comments,
|
||||
content_comments, rhetoric_comments
|
||||
)
|
||||
|
||||
elapsed = (time.time() - start_time) * 1000
|
||||
|
||||
result = {
|
||||
"total_score": total_score,
|
||||
"max_score": request.max_score,
|
||||
"dimensions": {
|
||||
"structure": round(struct_score / 100 * request.max_score * self.DIMENSION_WEIGHTS["structure"], 1),
|
||||
"grammar": round(grammar_score / 100 * request.max_score * self.DIMENSION_WEIGHTS["grammar"], 1),
|
||||
"content": round(content_score / 100 * request.max_score * self.DIMENSION_WEIGHTS["content"], 1),
|
||||
"rhetoric": round(rhetoric_score / 100 * request.max_score * self.DIMENSION_WEIGHTS["rhetoric"], 1),
|
||||
},
|
||||
"character_count": char_stats,
|
||||
"overall_comment": overall_comment,
|
||||
"structure_analysis": struct_comments,
|
||||
"content_analysis": content_comments,
|
||||
"rhetoric_analysis": rhetoric_comments,
|
||||
"grammar_errors": [e.dict() for e in grammar_errors] if request.enable_suggestions else [],
|
||||
"inference_time_ms": round(elapsed, 2)
|
||||
}
|
||||
return result
|
||||
|
||||
def _generate_overall_comment(self, score: float, max_score: int,
|
||||
struct_comments: List, content_comments: List,
|
||||
rhetoric_comments: List) -> str:
|
||||
"""生成综合评语"""
|
||||
ratio = score / max_score
|
||||
if ratio >= 0.9:
|
||||
prefix = "优秀!"
|
||||
elif ratio >= 0.75:
|
||||
prefix = "良好。"
|
||||
elif ratio >= 0.6:
|
||||
prefix = "中等。"
|
||||
else:
|
||||
prefix = "需要加强。"
|
||||
|
||||
suggestions = []
|
||||
if struct_comments:
|
||||
suggestions.append(struct_comments[0])
|
||||
if content_comments:
|
||||
suggestions.append(content_comments[0])
|
||||
if rhetoric_comments:
|
||||
suggestions.append(rhetoric_comments[0])
|
||||
|
||||
return f"{prefix}{';'.join(suggestions[:3])}"
|
||||
|
||||
|
||||
# ==================== API路由定义 ====================
|
||||
|
||||
router = APIRouter(prefix="/api/v1", tags=["作文批改"])
|
||||
_scoring_engine = EssayScoringEngine()
|
||||
|
||||
|
||||
@router.post("/essay/review")
|
||||
async def review_essay(request: EssayReviewRequest):
|
||||
"""
|
||||
AI作文评分与批改接口
|
||||
POST /api/v1/essay/review
|
||||
输入作文OCR识别文本,返回综合评分、各维度分析和修改建议
|
||||
"""
|
||||
try:
|
||||
result = _scoring_engine.review_essay(request)
|
||||
|
||||
# 审计日志记录
|
||||
logger.info(
|
||||
f"作文批改完成: score={result['total_score']}/{request.max_score}, "
|
||||
f"student={request.student_id}, assignment={request.assignment_id}, "
|
||||
f"chars={result['character_count']['chinese']}, time={result['inference_time_ms']}ms"
|
||||
)
|
||||
return {"code": 200, "msg": "success", "data": result}
|
||||
except Exception as e:
|
||||
logger.error(f"作文批改异常: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"作文批改服务异常: {str(e)}")
|
||||
Reference in New Issue
Block a user