software copyright
This commit is contained in:
@@ -0,0 +1,460 @@
|
||||
# 自然写教学数据分析与学情诊断系统软件 V1.0
|
||||
# analytics/writing_growth.py - 书写能力成长评测引擎
|
||||
|
||||
import logging
|
||||
import math
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from datetime import datetime, date, timedelta
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
logger = logging.getLogger("writech.analytics.writing_growth")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 书写成长数据模型
|
||||
# ============================================================
|
||||
|
||||
@dataclass
|
||||
class WritingSnapshot:
|
||||
"""书写能力时间切片"""
|
||||
date: str
|
||||
stroke_order_accuracy: float = 0.0
|
||||
writing_quality: float = 0.0
|
||||
writing_speed: float = 0.0
|
||||
char_structure: float = 0.0
|
||||
practice_count: int = 0
|
||||
total_chars: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class CharacterProgress:
|
||||
"""单字书写进步记录"""
|
||||
character: str
|
||||
first_score: float
|
||||
latest_score: float
|
||||
best_score: float
|
||||
practice_count: int
|
||||
improvement: float # latest - first
|
||||
mastery_level: str # beginner/intermediate/advanced/master
|
||||
|
||||
|
||||
@dataclass
|
||||
class WritingGrowthReport:
|
||||
"""书写成长评测报告"""
|
||||
student_id: str
|
||||
period_start: str
|
||||
period_end: str
|
||||
|
||||
# 总体评级
|
||||
overall_level: str = "" # 初学/入门/进阶/优秀/精通
|
||||
overall_score: float = 0.0
|
||||
overall_trend: str = "stable"
|
||||
|
||||
# 各维度评分与趋势
|
||||
stroke_order_score: float = 0.0
|
||||
stroke_order_trend: str = "stable"
|
||||
quality_score: float = 0.0
|
||||
quality_trend: str = "stable"
|
||||
speed_score: float = 0.0
|
||||
speed_trend: str = "stable"
|
||||
structure_score: float = 0.0
|
||||
structure_trend: str = "stable"
|
||||
|
||||
# 时序数据
|
||||
snapshots: List[WritingSnapshot] = field(default_factory=list)
|
||||
|
||||
# 单字进步排行
|
||||
most_improved_chars: List[CharacterProgress] = field(
|
||||
default_factory=list
|
||||
)
|
||||
needs_practice_chars: List[CharacterProgress] = field(
|
||||
default_factory=list
|
||||
)
|
||||
|
||||
# 练习统计
|
||||
total_practice_sessions: int = 0
|
||||
total_characters_written: int = 0
|
||||
avg_daily_practice_minutes: float = 0.0
|
||||
|
||||
# 生成时间
|
||||
analyzed_at: str = ""
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 书写成长评测引擎
|
||||
# ============================================================
|
||||
|
||||
class WritingGrowthAnalyzer:
|
||||
"""
|
||||
书写能力成长评测引擎
|
||||
|
||||
功能:
|
||||
1. 多维度书写能力评分(笔顺、规范性、速度、结构)
|
||||
2. 成长趋势分析(移动平均法平滑噪声)
|
||||
3. 单字进步追踪
|
||||
4. 书写等级评定
|
||||
5. 书写问题诊断
|
||||
"""
|
||||
|
||||
# 书写等级评定标准
|
||||
LEVEL_THRESHOLDS = {
|
||||
"精通": 95.0,
|
||||
"优秀": 85.0,
|
||||
"进阶": 70.0,
|
||||
"入门": 50.0,
|
||||
"初学": 0.0,
|
||||
}
|
||||
|
||||
# 各维度权重
|
||||
WEIGHTS = {
|
||||
"stroke_order": 0.25,
|
||||
"quality": 0.35,
|
||||
"speed": 0.15,
|
||||
"structure": 0.25,
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
logger.info("书写成长评测引擎初始化")
|
||||
|
||||
async def analyze_growth(
|
||||
self,
|
||||
student_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
granularity: str = "weekly",
|
||||
) -> WritingGrowthReport:
|
||||
"""
|
||||
分析学生书写能力成长情况
|
||||
|
||||
Args:
|
||||
student_id: 学生ID
|
||||
start_date: 分析起始日期
|
||||
end_date: 分析结束日期
|
||||
granularity: 时间粒度(daily/weekly/monthly)
|
||||
|
||||
Returns:
|
||||
书写成长评测报告
|
||||
"""
|
||||
logger.info(
|
||||
"书写成长分析: student=%s, %s~%s, 粒度=%s",
|
||||
student_id, start_date, end_date, granularity,
|
||||
)
|
||||
|
||||
# 1. 获取原始书写评分数据
|
||||
raw_data = await self._fetch_writing_scores(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
|
||||
# 2. 按时间粒度聚合
|
||||
snapshots = self._aggregate_by_period(raw_data, granularity)
|
||||
|
||||
# 3. 计算各维度评分和趋势
|
||||
stroke_score, stroke_trend = self._calc_dimension_trend(
|
||||
[s.stroke_order_accuracy for s in snapshots]
|
||||
)
|
||||
quality_score, quality_trend = self._calc_dimension_trend(
|
||||
[s.writing_quality for s in snapshots]
|
||||
)
|
||||
speed_score, speed_trend = self._calc_dimension_trend(
|
||||
[s.writing_speed for s in snapshots]
|
||||
)
|
||||
structure_score, structure_trend = self._calc_dimension_trend(
|
||||
[s.char_structure for s in snapshots]
|
||||
)
|
||||
|
||||
# 4. 计算综合评分
|
||||
overall_score = self._calc_overall_score(
|
||||
stroke_score, quality_score, speed_score, structure_score
|
||||
)
|
||||
overall_level = self._determine_level(overall_score)
|
||||
overall_trend = self._determine_overall_trend(snapshots)
|
||||
|
||||
# 5. 分析单字进步
|
||||
char_data = await self._fetch_character_scores(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
most_improved, needs_practice = self._analyze_char_progress(
|
||||
char_data
|
||||
)
|
||||
|
||||
# 6. 练习统计
|
||||
total_sessions = sum(s.practice_count for s in snapshots)
|
||||
total_chars = sum(s.total_chars for s in snapshots)
|
||||
days = max(
|
||||
(
|
||||
datetime.fromisoformat(end_date)
|
||||
- datetime.fromisoformat(start_date)
|
||||
).days,
|
||||
1,
|
||||
)
|
||||
avg_daily = total_chars / days * 0.5 # 估算每日练习分钟
|
||||
|
||||
report = WritingGrowthReport(
|
||||
student_id=student_id,
|
||||
period_start=start_date,
|
||||
period_end=end_date,
|
||||
overall_level=overall_level,
|
||||
overall_score=round(overall_score, 1),
|
||||
overall_trend=overall_trend,
|
||||
stroke_order_score=round(stroke_score, 1),
|
||||
stroke_order_trend=stroke_trend,
|
||||
quality_score=round(quality_score, 1),
|
||||
quality_trend=quality_trend,
|
||||
speed_score=round(speed_score, 1),
|
||||
speed_trend=speed_trend,
|
||||
structure_score=round(structure_score, 1),
|
||||
structure_trend=structure_trend,
|
||||
snapshots=snapshots,
|
||||
most_improved_chars=most_improved[:10],
|
||||
needs_practice_chars=needs_practice[:10],
|
||||
total_practice_sessions=total_sessions,
|
||||
total_characters_written=total_chars,
|
||||
avg_daily_practice_minutes=round(avg_daily, 1),
|
||||
analyzed_at=datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
async def _fetch_writing_scores(
|
||||
self, student_id: str, start: str, end: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""从ClickHouse获取书写评分原始数据"""
|
||||
# query = """
|
||||
# SELECT date, stroke_order_accuracy, writing_quality,
|
||||
# writing_speed, char_structure, practice_count, total_chars
|
||||
# FROM writing_growth
|
||||
# WHERE student_id = %(sid)s
|
||||
# AND date BETWEEN %(start)s AND %(end)s
|
||||
# ORDER BY date
|
||||
# """
|
||||
return []
|
||||
|
||||
async def _fetch_character_scores(
|
||||
self, student_id: str, start: str, end: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""获取单字练习评分数据"""
|
||||
# query = """
|
||||
# SELECT character, score, practice_at
|
||||
# FROM practice_records
|
||||
# WHERE student_id = %(sid)s
|
||||
# AND practice_at BETWEEN %(start)s AND %(end)s
|
||||
# ORDER BY character, practice_at
|
||||
# """
|
||||
return []
|
||||
|
||||
def _aggregate_by_period(
|
||||
self,
|
||||
raw_data: List[Dict[str, Any]],
|
||||
granularity: str,
|
||||
) -> List[WritingSnapshot]:
|
||||
"""按时间粒度聚合书写评分"""
|
||||
if not raw_data:
|
||||
return []
|
||||
|
||||
# 按日期分组
|
||||
period_map: Dict[str, List[Dict[str, Any]]] = {}
|
||||
for record in raw_data:
|
||||
date_str = record.get("date", "")
|
||||
if granularity == "weekly":
|
||||
# 按周分组(取周一日期)
|
||||
dt = datetime.fromisoformat(date_str)
|
||||
week_start = dt - timedelta(days=dt.weekday())
|
||||
period_key = week_start.date().isoformat()
|
||||
elif granularity == "monthly":
|
||||
period_key = date_str[:7] # YYYY-MM
|
||||
else:
|
||||
period_key = date_str
|
||||
|
||||
period_map.setdefault(period_key, []).append(record)
|
||||
|
||||
# 聚合每个周期
|
||||
snapshots: List[WritingSnapshot] = []
|
||||
for period, records in sorted(period_map.items()):
|
||||
n = len(records)
|
||||
snapshot = WritingSnapshot(
|
||||
date=period,
|
||||
stroke_order_accuracy=sum(
|
||||
r.get("stroke_order_accuracy", 0) for r in records
|
||||
) / n,
|
||||
writing_quality=sum(
|
||||
r.get("writing_quality", 0) for r in records
|
||||
) / n,
|
||||
writing_speed=sum(
|
||||
r.get("writing_speed", 0) for r in records
|
||||
) / n,
|
||||
char_structure=sum(
|
||||
r.get("char_structure", 0) for r in records
|
||||
) / n,
|
||||
practice_count=sum(
|
||||
r.get("practice_count", 0) for r in records
|
||||
),
|
||||
total_chars=sum(
|
||||
r.get("total_chars", 0) for r in records
|
||||
),
|
||||
)
|
||||
snapshots.append(snapshot)
|
||||
|
||||
return snapshots
|
||||
|
||||
def _calc_dimension_trend(
|
||||
self, values: List[float]
|
||||
) -> Tuple[float, str]:
|
||||
"""
|
||||
计算某维度的当前评分和趋势
|
||||
|
||||
使用指数移动平均(EMA)平滑数据噪声,
|
||||
对比最近EMA与早期EMA判断趋势。
|
||||
"""
|
||||
if not values:
|
||||
return 0.0, "stable"
|
||||
|
||||
# 指数移动平均(衰减因子0.3)
|
||||
alpha = 0.3
|
||||
ema_values = [values[0]]
|
||||
for i in range(1, len(values)):
|
||||
ema = alpha * values[i] + (1 - alpha) * ema_values[-1]
|
||||
ema_values.append(ema)
|
||||
|
||||
current_score = ema_values[-1]
|
||||
|
||||
# 趋势判断:对比前半段和后半段的EMA均值
|
||||
if len(ema_values) >= 4:
|
||||
mid = len(ema_values) // 2
|
||||
early_avg = sum(ema_values[:mid]) / mid
|
||||
recent_avg = sum(ema_values[mid:]) / (len(ema_values) - mid)
|
||||
diff = recent_avg - early_avg
|
||||
|
||||
if diff > 3:
|
||||
trend = "improving"
|
||||
elif diff < -3:
|
||||
trend = "declining"
|
||||
else:
|
||||
trend = "stable"
|
||||
else:
|
||||
trend = "stable"
|
||||
|
||||
return current_score, trend
|
||||
|
||||
def _calc_overall_score(
|
||||
self,
|
||||
stroke: float,
|
||||
quality: float,
|
||||
speed: float,
|
||||
structure: float,
|
||||
) -> float:
|
||||
"""加权计算综合书写评分"""
|
||||
return (
|
||||
stroke * self.WEIGHTS["stroke_order"]
|
||||
+ quality * self.WEIGHTS["quality"]
|
||||
+ speed * self.WEIGHTS["speed"]
|
||||
+ structure * self.WEIGHTS["structure"]
|
||||
)
|
||||
|
||||
def _determine_level(self, score: float) -> str:
|
||||
"""根据综合评分确定书写等级"""
|
||||
for level, threshold in self.LEVEL_THRESHOLDS.items():
|
||||
if score >= threshold:
|
||||
return level
|
||||
return "初学"
|
||||
|
||||
def _determine_overall_trend(
|
||||
self, snapshots: List[WritingSnapshot]
|
||||
) -> str:
|
||||
"""判断总体趋势"""
|
||||
if len(snapshots) < 2:
|
||||
return "stable"
|
||||
|
||||
# 计算每个快照的综合分
|
||||
scores = []
|
||||
for s in snapshots:
|
||||
overall = self._calc_overall_score(
|
||||
s.stroke_order_accuracy,
|
||||
s.writing_quality,
|
||||
s.writing_speed,
|
||||
s.char_structure,
|
||||
)
|
||||
scores.append(overall)
|
||||
|
||||
# 简单线性回归斜率判断趋势
|
||||
n = len(scores)
|
||||
x_mean = (n - 1) / 2
|
||||
y_mean = sum(scores) / n
|
||||
numerator = sum(
|
||||
(i - x_mean) * (scores[i] - y_mean) for i in range(n)
|
||||
)
|
||||
denominator = sum((i - x_mean) ** 2 for i in range(n))
|
||||
|
||||
if denominator == 0:
|
||||
return "stable"
|
||||
|
||||
slope = numerator / denominator
|
||||
|
||||
if slope > 0.5:
|
||||
return "improving"
|
||||
elif slope < -0.5:
|
||||
return "declining"
|
||||
return "stable"
|
||||
|
||||
def _analyze_char_progress(
|
||||
self, char_data: List[Dict[str, Any]]
|
||||
) -> Tuple[List[CharacterProgress], List[CharacterProgress]]:
|
||||
"""
|
||||
分析单字进步情况
|
||||
|
||||
对每个练习过的汉字,比较首次评分和最近评分,
|
||||
找出进步最大的字和仍需练习的字。
|
||||
"""
|
||||
char_map: Dict[str, List[Tuple[float, str]]] = {}
|
||||
|
||||
for record in char_data:
|
||||
char = record.get("character", "")
|
||||
score = record.get("score", 0.0)
|
||||
practice_at = record.get("practice_at", "")
|
||||
char_map.setdefault(char, []).append((score, practice_at))
|
||||
|
||||
progress_list: List[CharacterProgress] = []
|
||||
|
||||
for char, entries in char_map.items():
|
||||
# 按时间排序
|
||||
entries.sort(key=lambda e: e[1])
|
||||
|
||||
first_score = entries[0][0]
|
||||
latest_score = entries[-1][0]
|
||||
best_score = max(e[0] for e in entries)
|
||||
improvement = latest_score - first_score
|
||||
|
||||
# 掌握等级判定
|
||||
if latest_score >= 90:
|
||||
level = "master"
|
||||
elif latest_score >= 75:
|
||||
level = "advanced"
|
||||
elif latest_score >= 60:
|
||||
level = "intermediate"
|
||||
else:
|
||||
level = "beginner"
|
||||
|
||||
progress_list.append(CharacterProgress(
|
||||
character=char,
|
||||
first_score=first_score,
|
||||
latest_score=latest_score,
|
||||
best_score=best_score,
|
||||
practice_count=len(entries),
|
||||
improvement=round(improvement, 1),
|
||||
mastery_level=level,
|
||||
))
|
||||
|
||||
# 按进步幅度降序排列(进步最大的)
|
||||
most_improved = sorted(
|
||||
progress_list, key=lambda p: p.improvement, reverse=True
|
||||
)
|
||||
|
||||
# 仍需练习的(最新分低于70且练习次数>3)
|
||||
needs_practice = sorted(
|
||||
[
|
||||
p for p in progress_list
|
||||
if p.latest_score < 70 and p.practice_count > 3
|
||||
],
|
||||
key=lambda p: p.latest_score,
|
||||
)
|
||||
|
||||
return most_improved, needs_practice
|
||||
Reference in New Issue
Block a user