461 lines
14 KiB
Python
461 lines
14 KiB
Python
# 自然写教学数据分析与学情诊断系统软件 V1.0
|
||
# analytics/writing_growth.py - 书写能力成长评测引擎
|
||
|
||
import logging
|
||
import math
|
||
from typing import Any, Dict, List, Optional, Tuple
|
||
from datetime import datetime, date, timedelta
|
||
from dataclasses import dataclass, field
|
||
|
||
logger = logging.getLogger("writech.analytics.writing_growth")
|
||
|
||
|
||
# ============================================================
|
||
# 书写成长数据模型
|
||
# ============================================================
|
||
|
||
@dataclass
|
||
class WritingSnapshot:
|
||
"""书写能力时间切片"""
|
||
date: str
|
||
stroke_order_accuracy: float = 0.0
|
||
writing_quality: float = 0.0
|
||
writing_speed: float = 0.0
|
||
char_structure: float = 0.0
|
||
practice_count: int = 0
|
||
total_chars: int = 0
|
||
|
||
|
||
@dataclass
|
||
class CharacterProgress:
|
||
"""单字书写进步记录"""
|
||
character: str
|
||
first_score: float
|
||
latest_score: float
|
||
best_score: float
|
||
practice_count: int
|
||
improvement: float # latest - first
|
||
mastery_level: str # beginner/intermediate/advanced/master
|
||
|
||
|
||
@dataclass
|
||
class WritingGrowthReport:
|
||
"""书写成长评测报告"""
|
||
student_id: str
|
||
period_start: str
|
||
period_end: str
|
||
|
||
# 总体评级
|
||
overall_level: str = "" # 初学/入门/进阶/优秀/精通
|
||
overall_score: float = 0.0
|
||
overall_trend: str = "stable"
|
||
|
||
# 各维度评分与趋势
|
||
stroke_order_score: float = 0.0
|
||
stroke_order_trend: str = "stable"
|
||
quality_score: float = 0.0
|
||
quality_trend: str = "stable"
|
||
speed_score: float = 0.0
|
||
speed_trend: str = "stable"
|
||
structure_score: float = 0.0
|
||
structure_trend: str = "stable"
|
||
|
||
# 时序数据
|
||
snapshots: List[WritingSnapshot] = field(default_factory=list)
|
||
|
||
# 单字进步排行
|
||
most_improved_chars: List[CharacterProgress] = field(
|
||
default_factory=list
|
||
)
|
||
needs_practice_chars: List[CharacterProgress] = field(
|
||
default_factory=list
|
||
)
|
||
|
||
# 练习统计
|
||
total_practice_sessions: int = 0
|
||
total_characters_written: int = 0
|
||
avg_daily_practice_minutes: float = 0.0
|
||
|
||
# 生成时间
|
||
analyzed_at: str = ""
|
||
|
||
|
||
# ============================================================
|
||
# 书写成长评测引擎
|
||
# ============================================================
|
||
|
||
class WritingGrowthAnalyzer:
|
||
"""
|
||
书写能力成长评测引擎
|
||
|
||
功能:
|
||
1. 多维度书写能力评分(笔顺、规范性、速度、结构)
|
||
2. 成长趋势分析(移动平均法平滑噪声)
|
||
3. 单字进步追踪
|
||
4. 书写等级评定
|
||
5. 书写问题诊断
|
||
"""
|
||
|
||
# 书写等级评定标准
|
||
LEVEL_THRESHOLDS = {
|
||
"精通": 95.0,
|
||
"优秀": 85.0,
|
||
"进阶": 70.0,
|
||
"入门": 50.0,
|
||
"初学": 0.0,
|
||
}
|
||
|
||
# 各维度权重
|
||
WEIGHTS = {
|
||
"stroke_order": 0.25,
|
||
"quality": 0.35,
|
||
"speed": 0.15,
|
||
"structure": 0.25,
|
||
}
|
||
|
||
def __init__(self):
|
||
logger.info("书写成长评测引擎初始化")
|
||
|
||
async def analyze_growth(
|
||
self,
|
||
student_id: str,
|
||
start_date: str,
|
||
end_date: str,
|
||
granularity: str = "weekly",
|
||
) -> WritingGrowthReport:
|
||
"""
|
||
分析学生书写能力成长情况
|
||
|
||
Args:
|
||
student_id: 学生ID
|
||
start_date: 分析起始日期
|
||
end_date: 分析结束日期
|
||
granularity: 时间粒度(daily/weekly/monthly)
|
||
|
||
Returns:
|
||
书写成长评测报告
|
||
"""
|
||
logger.info(
|
||
"书写成长分析: student=%s, %s~%s, 粒度=%s",
|
||
student_id, start_date, end_date, granularity,
|
||
)
|
||
|
||
# 1. 获取原始书写评分数据
|
||
raw_data = await self._fetch_writing_scores(
|
||
student_id, start_date, end_date
|
||
)
|
||
|
||
# 2. 按时间粒度聚合
|
||
snapshots = self._aggregate_by_period(raw_data, granularity)
|
||
|
||
# 3. 计算各维度评分和趋势
|
||
stroke_score, stroke_trend = self._calc_dimension_trend(
|
||
[s.stroke_order_accuracy for s in snapshots]
|
||
)
|
||
quality_score, quality_trend = self._calc_dimension_trend(
|
||
[s.writing_quality for s in snapshots]
|
||
)
|
||
speed_score, speed_trend = self._calc_dimension_trend(
|
||
[s.writing_speed for s in snapshots]
|
||
)
|
||
structure_score, structure_trend = self._calc_dimension_trend(
|
||
[s.char_structure for s in snapshots]
|
||
)
|
||
|
||
# 4. 计算综合评分
|
||
overall_score = self._calc_overall_score(
|
||
stroke_score, quality_score, speed_score, structure_score
|
||
)
|
||
overall_level = self._determine_level(overall_score)
|
||
overall_trend = self._determine_overall_trend(snapshots)
|
||
|
||
# 5. 分析单字进步
|
||
char_data = await self._fetch_character_scores(
|
||
student_id, start_date, end_date
|
||
)
|
||
most_improved, needs_practice = self._analyze_char_progress(
|
||
char_data
|
||
)
|
||
|
||
# 6. 练习统计
|
||
total_sessions = sum(s.practice_count for s in snapshots)
|
||
total_chars = sum(s.total_chars for s in snapshots)
|
||
days = max(
|
||
(
|
||
datetime.fromisoformat(end_date)
|
||
- datetime.fromisoformat(start_date)
|
||
).days,
|
||
1,
|
||
)
|
||
avg_daily = total_chars / days * 0.5 # 估算每日练习分钟
|
||
|
||
report = WritingGrowthReport(
|
||
student_id=student_id,
|
||
period_start=start_date,
|
||
period_end=end_date,
|
||
overall_level=overall_level,
|
||
overall_score=round(overall_score, 1),
|
||
overall_trend=overall_trend,
|
||
stroke_order_score=round(stroke_score, 1),
|
||
stroke_order_trend=stroke_trend,
|
||
quality_score=round(quality_score, 1),
|
||
quality_trend=quality_trend,
|
||
speed_score=round(speed_score, 1),
|
||
speed_trend=speed_trend,
|
||
structure_score=round(structure_score, 1),
|
||
structure_trend=structure_trend,
|
||
snapshots=snapshots,
|
||
most_improved_chars=most_improved[:10],
|
||
needs_practice_chars=needs_practice[:10],
|
||
total_practice_sessions=total_sessions,
|
||
total_characters_written=total_chars,
|
||
avg_daily_practice_minutes=round(avg_daily, 1),
|
||
analyzed_at=datetime.now().isoformat(),
|
||
)
|
||
|
||
return report
|
||
|
||
async def _fetch_writing_scores(
|
||
self, student_id: str, start: str, end: str
|
||
) -> List[Dict[str, Any]]:
|
||
"""从ClickHouse获取书写评分原始数据"""
|
||
# query = """
|
||
# SELECT date, stroke_order_accuracy, writing_quality,
|
||
# writing_speed, char_structure, practice_count, total_chars
|
||
# FROM writing_growth
|
||
# WHERE student_id = %(sid)s
|
||
# AND date BETWEEN %(start)s AND %(end)s
|
||
# ORDER BY date
|
||
# """
|
||
return []
|
||
|
||
async def _fetch_character_scores(
|
||
self, student_id: str, start: str, end: str
|
||
) -> List[Dict[str, Any]]:
|
||
"""获取单字练习评分数据"""
|
||
# query = """
|
||
# SELECT character, score, practice_at
|
||
# FROM practice_records
|
||
# WHERE student_id = %(sid)s
|
||
# AND practice_at BETWEEN %(start)s AND %(end)s
|
||
# ORDER BY character, practice_at
|
||
# """
|
||
return []
|
||
|
||
def _aggregate_by_period(
|
||
self,
|
||
raw_data: List[Dict[str, Any]],
|
||
granularity: str,
|
||
) -> List[WritingSnapshot]:
|
||
"""按时间粒度聚合书写评分"""
|
||
if not raw_data:
|
||
return []
|
||
|
||
# 按日期分组
|
||
period_map: Dict[str, List[Dict[str, Any]]] = {}
|
||
for record in raw_data:
|
||
date_str = record.get("date", "")
|
||
if granularity == "weekly":
|
||
# 按周分组(取周一日期)
|
||
dt = datetime.fromisoformat(date_str)
|
||
week_start = dt - timedelta(days=dt.weekday())
|
||
period_key = week_start.date().isoformat()
|
||
elif granularity == "monthly":
|
||
period_key = date_str[:7] # YYYY-MM
|
||
else:
|
||
period_key = date_str
|
||
|
||
period_map.setdefault(period_key, []).append(record)
|
||
|
||
# 聚合每个周期
|
||
snapshots: List[WritingSnapshot] = []
|
||
for period, records in sorted(period_map.items()):
|
||
n = len(records)
|
||
snapshot = WritingSnapshot(
|
||
date=period,
|
||
stroke_order_accuracy=sum(
|
||
r.get("stroke_order_accuracy", 0) for r in records
|
||
) / n,
|
||
writing_quality=sum(
|
||
r.get("writing_quality", 0) for r in records
|
||
) / n,
|
||
writing_speed=sum(
|
||
r.get("writing_speed", 0) for r in records
|
||
) / n,
|
||
char_structure=sum(
|
||
r.get("char_structure", 0) for r in records
|
||
) / n,
|
||
practice_count=sum(
|
||
r.get("practice_count", 0) for r in records
|
||
),
|
||
total_chars=sum(
|
||
r.get("total_chars", 0) for r in records
|
||
),
|
||
)
|
||
snapshots.append(snapshot)
|
||
|
||
return snapshots
|
||
|
||
def _calc_dimension_trend(
|
||
self, values: List[float]
|
||
) -> Tuple[float, str]:
|
||
"""
|
||
计算某维度的当前评分和趋势
|
||
|
||
使用指数移动平均(EMA)平滑数据噪声,
|
||
对比最近EMA与早期EMA判断趋势。
|
||
"""
|
||
if not values:
|
||
return 0.0, "stable"
|
||
|
||
# 指数移动平均(衰减因子0.3)
|
||
alpha = 0.3
|
||
ema_values = [values[0]]
|
||
for i in range(1, len(values)):
|
||
ema = alpha * values[i] + (1 - alpha) * ema_values[-1]
|
||
ema_values.append(ema)
|
||
|
||
current_score = ema_values[-1]
|
||
|
||
# 趋势判断:对比前半段和后半段的EMA均值
|
||
if len(ema_values) >= 4:
|
||
mid = len(ema_values) // 2
|
||
early_avg = sum(ema_values[:mid]) / mid
|
||
recent_avg = sum(ema_values[mid:]) / (len(ema_values) - mid)
|
||
diff = recent_avg - early_avg
|
||
|
||
if diff > 3:
|
||
trend = "improving"
|
||
elif diff < -3:
|
||
trend = "declining"
|
||
else:
|
||
trend = "stable"
|
||
else:
|
||
trend = "stable"
|
||
|
||
return current_score, trend
|
||
|
||
def _calc_overall_score(
|
||
self,
|
||
stroke: float,
|
||
quality: float,
|
||
speed: float,
|
||
structure: float,
|
||
) -> float:
|
||
"""加权计算综合书写评分"""
|
||
return (
|
||
stroke * self.WEIGHTS["stroke_order"]
|
||
+ quality * self.WEIGHTS["quality"]
|
||
+ speed * self.WEIGHTS["speed"]
|
||
+ structure * self.WEIGHTS["structure"]
|
||
)
|
||
|
||
def _determine_level(self, score: float) -> str:
|
||
"""根据综合评分确定书写等级"""
|
||
for level, threshold in self.LEVEL_THRESHOLDS.items():
|
||
if score >= threshold:
|
||
return level
|
||
return "初学"
|
||
|
||
def _determine_overall_trend(
|
||
self, snapshots: List[WritingSnapshot]
|
||
) -> str:
|
||
"""判断总体趋势"""
|
||
if len(snapshots) < 2:
|
||
return "stable"
|
||
|
||
# 计算每个快照的综合分
|
||
scores = []
|
||
for s in snapshots:
|
||
overall = self._calc_overall_score(
|
||
s.stroke_order_accuracy,
|
||
s.writing_quality,
|
||
s.writing_speed,
|
||
s.char_structure,
|
||
)
|
||
scores.append(overall)
|
||
|
||
# 简单线性回归斜率判断趋势
|
||
n = len(scores)
|
||
x_mean = (n - 1) / 2
|
||
y_mean = sum(scores) / n
|
||
numerator = sum(
|
||
(i - x_mean) * (scores[i] - y_mean) for i in range(n)
|
||
)
|
||
denominator = sum((i - x_mean) ** 2 for i in range(n))
|
||
|
||
if denominator == 0:
|
||
return "stable"
|
||
|
||
slope = numerator / denominator
|
||
|
||
if slope > 0.5:
|
||
return "improving"
|
||
elif slope < -0.5:
|
||
return "declining"
|
||
return "stable"
|
||
|
||
def _analyze_char_progress(
|
||
self, char_data: List[Dict[str, Any]]
|
||
) -> Tuple[List[CharacterProgress], List[CharacterProgress]]:
|
||
"""
|
||
分析单字进步情况
|
||
|
||
对每个练习过的汉字,比较首次评分和最近评分,
|
||
找出进步最大的字和仍需练习的字。
|
||
"""
|
||
char_map: Dict[str, List[Tuple[float, str]]] = {}
|
||
|
||
for record in char_data:
|
||
char = record.get("character", "")
|
||
score = record.get("score", 0.0)
|
||
practice_at = record.get("practice_at", "")
|
||
char_map.setdefault(char, []).append((score, practice_at))
|
||
|
||
progress_list: List[CharacterProgress] = []
|
||
|
||
for char, entries in char_map.items():
|
||
# 按时间排序
|
||
entries.sort(key=lambda e: e[1])
|
||
|
||
first_score = entries[0][0]
|
||
latest_score = entries[-1][0]
|
||
best_score = max(e[0] for e in entries)
|
||
improvement = latest_score - first_score
|
||
|
||
# 掌握等级判定
|
||
if latest_score >= 90:
|
||
level = "master"
|
||
elif latest_score >= 75:
|
||
level = "advanced"
|
||
elif latest_score >= 60:
|
||
level = "intermediate"
|
||
else:
|
||
level = "beginner"
|
||
|
||
progress_list.append(CharacterProgress(
|
||
character=char,
|
||
first_score=first_score,
|
||
latest_score=latest_score,
|
||
best_score=best_score,
|
||
practice_count=len(entries),
|
||
improvement=round(improvement, 1),
|
||
mastery_level=level,
|
||
))
|
||
|
||
# 按进步幅度降序排列(进步最大的)
|
||
most_improved = sorted(
|
||
progress_list, key=lambda p: p.improvement, reverse=True
|
||
)
|
||
|
||
# 仍需练习的(最新分低于70且练习次数>3)
|
||
needs_practice = sorted(
|
||
[
|
||
p for p in progress_list
|
||
if p.latest_score < 70 and p.practice_count > 3
|
||
],
|
||
key=lambda p: p.latest_score,
|
||
)
|
||
|
||
return most_improved, needs_practice
|