Files
system-design/software-copyright/03-writech-learning-analytics/analytics/writing_growth.py
T
2026-03-22 15:24:40 +08:00

461 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# 自然写教学数据分析与学情诊断系统软件 V1.0
# analytics/writing_growth.py - 书写能力成长评测引擎
import logging
import math
from typing import Any, Dict, List, Optional, Tuple
from datetime import datetime, date, timedelta
from dataclasses import dataclass, field
logger = logging.getLogger("writech.analytics.writing_growth")
# ============================================================
# 书写成长数据模型
# ============================================================
@dataclass
class WritingSnapshot:
"""书写能力时间切片"""
date: str
stroke_order_accuracy: float = 0.0
writing_quality: float = 0.0
writing_speed: float = 0.0
char_structure: float = 0.0
practice_count: int = 0
total_chars: int = 0
@dataclass
class CharacterProgress:
"""单字书写进步记录"""
character: str
first_score: float
latest_score: float
best_score: float
practice_count: int
improvement: float # latest - first
mastery_level: str # beginner/intermediate/advanced/master
@dataclass
class WritingGrowthReport:
"""书写成长评测报告"""
student_id: str
period_start: str
period_end: str
# 总体评级
overall_level: str = "" # 初学/入门/进阶/优秀/精通
overall_score: float = 0.0
overall_trend: str = "stable"
# 各维度评分与趋势
stroke_order_score: float = 0.0
stroke_order_trend: str = "stable"
quality_score: float = 0.0
quality_trend: str = "stable"
speed_score: float = 0.0
speed_trend: str = "stable"
structure_score: float = 0.0
structure_trend: str = "stable"
# 时序数据
snapshots: List[WritingSnapshot] = field(default_factory=list)
# 单字进步排行
most_improved_chars: List[CharacterProgress] = field(
default_factory=list
)
needs_practice_chars: List[CharacterProgress] = field(
default_factory=list
)
# 练习统计
total_practice_sessions: int = 0
total_characters_written: int = 0
avg_daily_practice_minutes: float = 0.0
# 生成时间
analyzed_at: str = ""
# ============================================================
# 书写成长评测引擎
# ============================================================
class WritingGrowthAnalyzer:
"""
书写能力成长评测引擎
功能:
1. 多维度书写能力评分(笔顺、规范性、速度、结构)
2. 成长趋势分析(移动平均法平滑噪声)
3. 单字进步追踪
4. 书写等级评定
5. 书写问题诊断
"""
# 书写等级评定标准
LEVEL_THRESHOLDS = {
"精通": 95.0,
"优秀": 85.0,
"进阶": 70.0,
"入门": 50.0,
"初学": 0.0,
}
# 各维度权重
WEIGHTS = {
"stroke_order": 0.25,
"quality": 0.35,
"speed": 0.15,
"structure": 0.25,
}
def __init__(self):
logger.info("书写成长评测引擎初始化")
async def analyze_growth(
self,
student_id: str,
start_date: str,
end_date: str,
granularity: str = "weekly",
) -> WritingGrowthReport:
"""
分析学生书写能力成长情况
Args:
student_id: 学生ID
start_date: 分析起始日期
end_date: 分析结束日期
granularity: 时间粒度(daily/weekly/monthly
Returns:
书写成长评测报告
"""
logger.info(
"书写成长分析: student=%s, %s~%s, 粒度=%s",
student_id, start_date, end_date, granularity,
)
# 1. 获取原始书写评分数据
raw_data = await self._fetch_writing_scores(
student_id, start_date, end_date
)
# 2. 按时间粒度聚合
snapshots = self._aggregate_by_period(raw_data, granularity)
# 3. 计算各维度评分和趋势
stroke_score, stroke_trend = self._calc_dimension_trend(
[s.stroke_order_accuracy for s in snapshots]
)
quality_score, quality_trend = self._calc_dimension_trend(
[s.writing_quality for s in snapshots]
)
speed_score, speed_trend = self._calc_dimension_trend(
[s.writing_speed for s in snapshots]
)
structure_score, structure_trend = self._calc_dimension_trend(
[s.char_structure for s in snapshots]
)
# 4. 计算综合评分
overall_score = self._calc_overall_score(
stroke_score, quality_score, speed_score, structure_score
)
overall_level = self._determine_level(overall_score)
overall_trend = self._determine_overall_trend(snapshots)
# 5. 分析单字进步
char_data = await self._fetch_character_scores(
student_id, start_date, end_date
)
most_improved, needs_practice = self._analyze_char_progress(
char_data
)
# 6. 练习统计
total_sessions = sum(s.practice_count for s in snapshots)
total_chars = sum(s.total_chars for s in snapshots)
days = max(
(
datetime.fromisoformat(end_date)
- datetime.fromisoformat(start_date)
).days,
1,
)
avg_daily = total_chars / days * 0.5 # 估算每日练习分钟
report = WritingGrowthReport(
student_id=student_id,
period_start=start_date,
period_end=end_date,
overall_level=overall_level,
overall_score=round(overall_score, 1),
overall_trend=overall_trend,
stroke_order_score=round(stroke_score, 1),
stroke_order_trend=stroke_trend,
quality_score=round(quality_score, 1),
quality_trend=quality_trend,
speed_score=round(speed_score, 1),
speed_trend=speed_trend,
structure_score=round(structure_score, 1),
structure_trend=structure_trend,
snapshots=snapshots,
most_improved_chars=most_improved[:10],
needs_practice_chars=needs_practice[:10],
total_practice_sessions=total_sessions,
total_characters_written=total_chars,
avg_daily_practice_minutes=round(avg_daily, 1),
analyzed_at=datetime.now().isoformat(),
)
return report
async def _fetch_writing_scores(
self, student_id: str, start: str, end: str
) -> List[Dict[str, Any]]:
"""从ClickHouse获取书写评分原始数据"""
# query = """
# SELECT date, stroke_order_accuracy, writing_quality,
# writing_speed, char_structure, practice_count, total_chars
# FROM writing_growth
# WHERE student_id = %(sid)s
# AND date BETWEEN %(start)s AND %(end)s
# ORDER BY date
# """
return []
async def _fetch_character_scores(
self, student_id: str, start: str, end: str
) -> List[Dict[str, Any]]:
"""获取单字练习评分数据"""
# query = """
# SELECT character, score, practice_at
# FROM practice_records
# WHERE student_id = %(sid)s
# AND practice_at BETWEEN %(start)s AND %(end)s
# ORDER BY character, practice_at
# """
return []
def _aggregate_by_period(
self,
raw_data: List[Dict[str, Any]],
granularity: str,
) -> List[WritingSnapshot]:
"""按时间粒度聚合书写评分"""
if not raw_data:
return []
# 按日期分组
period_map: Dict[str, List[Dict[str, Any]]] = {}
for record in raw_data:
date_str = record.get("date", "")
if granularity == "weekly":
# 按周分组(取周一日期)
dt = datetime.fromisoformat(date_str)
week_start = dt - timedelta(days=dt.weekday())
period_key = week_start.date().isoformat()
elif granularity == "monthly":
period_key = date_str[:7] # YYYY-MM
else:
period_key = date_str
period_map.setdefault(period_key, []).append(record)
# 聚合每个周期
snapshots: List[WritingSnapshot] = []
for period, records in sorted(period_map.items()):
n = len(records)
snapshot = WritingSnapshot(
date=period,
stroke_order_accuracy=sum(
r.get("stroke_order_accuracy", 0) for r in records
) / n,
writing_quality=sum(
r.get("writing_quality", 0) for r in records
) / n,
writing_speed=sum(
r.get("writing_speed", 0) for r in records
) / n,
char_structure=sum(
r.get("char_structure", 0) for r in records
) / n,
practice_count=sum(
r.get("practice_count", 0) for r in records
),
total_chars=sum(
r.get("total_chars", 0) for r in records
),
)
snapshots.append(snapshot)
return snapshots
def _calc_dimension_trend(
self, values: List[float]
) -> Tuple[float, str]:
"""
计算某维度的当前评分和趋势
使用指数移动平均(EMA)平滑数据噪声,
对比最近EMA与早期EMA判断趋势。
"""
if not values:
return 0.0, "stable"
# 指数移动平均(衰减因子0.3
alpha = 0.3
ema_values = [values[0]]
for i in range(1, len(values)):
ema = alpha * values[i] + (1 - alpha) * ema_values[-1]
ema_values.append(ema)
current_score = ema_values[-1]
# 趋势判断:对比前半段和后半段的EMA均值
if len(ema_values) >= 4:
mid = len(ema_values) // 2
early_avg = sum(ema_values[:mid]) / mid
recent_avg = sum(ema_values[mid:]) / (len(ema_values) - mid)
diff = recent_avg - early_avg
if diff > 3:
trend = "improving"
elif diff < -3:
trend = "declining"
else:
trend = "stable"
else:
trend = "stable"
return current_score, trend
def _calc_overall_score(
self,
stroke: float,
quality: float,
speed: float,
structure: float,
) -> float:
"""加权计算综合书写评分"""
return (
stroke * self.WEIGHTS["stroke_order"]
+ quality * self.WEIGHTS["quality"]
+ speed * self.WEIGHTS["speed"]
+ structure * self.WEIGHTS["structure"]
)
def _determine_level(self, score: float) -> str:
"""根据综合评分确定书写等级"""
for level, threshold in self.LEVEL_THRESHOLDS.items():
if score >= threshold:
return level
return "初学"
def _determine_overall_trend(
self, snapshots: List[WritingSnapshot]
) -> str:
"""判断总体趋势"""
if len(snapshots) < 2:
return "stable"
# 计算每个快照的综合分
scores = []
for s in snapshots:
overall = self._calc_overall_score(
s.stroke_order_accuracy,
s.writing_quality,
s.writing_speed,
s.char_structure,
)
scores.append(overall)
# 简单线性回归斜率判断趋势
n = len(scores)
x_mean = (n - 1) / 2
y_mean = sum(scores) / n
numerator = sum(
(i - x_mean) * (scores[i] - y_mean) for i in range(n)
)
denominator = sum((i - x_mean) ** 2 for i in range(n))
if denominator == 0:
return "stable"
slope = numerator / denominator
if slope > 0.5:
return "improving"
elif slope < -0.5:
return "declining"
return "stable"
def _analyze_char_progress(
self, char_data: List[Dict[str, Any]]
) -> Tuple[List[CharacterProgress], List[CharacterProgress]]:
"""
分析单字进步情况
对每个练习过的汉字,比较首次评分和最近评分,
找出进步最大的字和仍需练习的字。
"""
char_map: Dict[str, List[Tuple[float, str]]] = {}
for record in char_data:
char = record.get("character", "")
score = record.get("score", 0.0)
practice_at = record.get("practice_at", "")
char_map.setdefault(char, []).append((score, practice_at))
progress_list: List[CharacterProgress] = []
for char, entries in char_map.items():
# 按时间排序
entries.sort(key=lambda e: e[1])
first_score = entries[0][0]
latest_score = entries[-1][0]
best_score = max(e[0] for e in entries)
improvement = latest_score - first_score
# 掌握等级判定
if latest_score >= 90:
level = "master"
elif latest_score >= 75:
level = "advanced"
elif latest_score >= 60:
level = "intermediate"
else:
level = "beginner"
progress_list.append(CharacterProgress(
character=char,
first_score=first_score,
latest_score=latest_score,
best_score=best_score,
practice_count=len(entries),
improvement=round(improvement, 1),
mastery_level=level,
))
# 按进步幅度降序排列(进步最大的)
most_improved = sorted(
progress_list, key=lambda p: p.improvement, reverse=True
)
# 仍需练习的(最新分低于70且练习次数>3)
needs_practice = sorted(
[
p for p in progress_list
if p.latest_score < 70 and p.practice_count > 3
],
key=lambda p: p.latest_score,
)
return most_improved, needs_practice