software copyright

This commit is contained in:
jiahong
2026-03-22 15:24:40 +08:00
parent e303bb868a
commit 60f336e345
155 changed files with 127262 additions and 0 deletions
@@ -0,0 +1,460 @@
# 自然写教学数据分析与学情诊断系统软件 V1.0
# analytics/writing_growth.py - 书写能力成长评测引擎
import logging
import math
from typing import Any, Dict, List, Optional, Tuple
from datetime import datetime, date, timedelta
from dataclasses import dataclass, field
logger = logging.getLogger("writech.analytics.writing_growth")
# ============================================================
# 书写成长数据模型
# ============================================================
@dataclass
class WritingSnapshot:
"""书写能力时间切片"""
date: str
stroke_order_accuracy: float = 0.0
writing_quality: float = 0.0
writing_speed: float = 0.0
char_structure: float = 0.0
practice_count: int = 0
total_chars: int = 0
@dataclass
class CharacterProgress:
"""单字书写进步记录"""
character: str
first_score: float
latest_score: float
best_score: float
practice_count: int
improvement: float # latest - first
mastery_level: str # beginner/intermediate/advanced/master
@dataclass
class WritingGrowthReport:
"""书写成长评测报告"""
student_id: str
period_start: str
period_end: str
# 总体评级
overall_level: str = "" # 初学/入门/进阶/优秀/精通
overall_score: float = 0.0
overall_trend: str = "stable"
# 各维度评分与趋势
stroke_order_score: float = 0.0
stroke_order_trend: str = "stable"
quality_score: float = 0.0
quality_trend: str = "stable"
speed_score: float = 0.0
speed_trend: str = "stable"
structure_score: float = 0.0
structure_trend: str = "stable"
# 时序数据
snapshots: List[WritingSnapshot] = field(default_factory=list)
# 单字进步排行
most_improved_chars: List[CharacterProgress] = field(
default_factory=list
)
needs_practice_chars: List[CharacterProgress] = field(
default_factory=list
)
# 练习统计
total_practice_sessions: int = 0
total_characters_written: int = 0
avg_daily_practice_minutes: float = 0.0
# 生成时间
analyzed_at: str = ""
# ============================================================
# 书写成长评测引擎
# ============================================================
class WritingGrowthAnalyzer:
"""
书写能力成长评测引擎
功能:
1. 多维度书写能力评分(笔顺、规范性、速度、结构)
2. 成长趋势分析(移动平均法平滑噪声)
3. 单字进步追踪
4. 书写等级评定
5. 书写问题诊断
"""
# 书写等级评定标准
LEVEL_THRESHOLDS = {
"精通": 95.0,
"优秀": 85.0,
"进阶": 70.0,
"入门": 50.0,
"初学": 0.0,
}
# 各维度权重
WEIGHTS = {
"stroke_order": 0.25,
"quality": 0.35,
"speed": 0.15,
"structure": 0.25,
}
def __init__(self):
logger.info("书写成长评测引擎初始化")
async def analyze_growth(
self,
student_id: str,
start_date: str,
end_date: str,
granularity: str = "weekly",
) -> WritingGrowthReport:
"""
分析学生书写能力成长情况
Args:
student_id: 学生ID
start_date: 分析起始日期
end_date: 分析结束日期
granularity: 时间粒度(daily/weekly/monthly
Returns:
书写成长评测报告
"""
logger.info(
"书写成长分析: student=%s, %s~%s, 粒度=%s",
student_id, start_date, end_date, granularity,
)
# 1. 获取原始书写评分数据
raw_data = await self._fetch_writing_scores(
student_id, start_date, end_date
)
# 2. 按时间粒度聚合
snapshots = self._aggregate_by_period(raw_data, granularity)
# 3. 计算各维度评分和趋势
stroke_score, stroke_trend = self._calc_dimension_trend(
[s.stroke_order_accuracy for s in snapshots]
)
quality_score, quality_trend = self._calc_dimension_trend(
[s.writing_quality for s in snapshots]
)
speed_score, speed_trend = self._calc_dimension_trend(
[s.writing_speed for s in snapshots]
)
structure_score, structure_trend = self._calc_dimension_trend(
[s.char_structure for s in snapshots]
)
# 4. 计算综合评分
overall_score = self._calc_overall_score(
stroke_score, quality_score, speed_score, structure_score
)
overall_level = self._determine_level(overall_score)
overall_trend = self._determine_overall_trend(snapshots)
# 5. 分析单字进步
char_data = await self._fetch_character_scores(
student_id, start_date, end_date
)
most_improved, needs_practice = self._analyze_char_progress(
char_data
)
# 6. 练习统计
total_sessions = sum(s.practice_count for s in snapshots)
total_chars = sum(s.total_chars for s in snapshots)
days = max(
(
datetime.fromisoformat(end_date)
- datetime.fromisoformat(start_date)
).days,
1,
)
avg_daily = total_chars / days * 0.5 # 估算每日练习分钟
report = WritingGrowthReport(
student_id=student_id,
period_start=start_date,
period_end=end_date,
overall_level=overall_level,
overall_score=round(overall_score, 1),
overall_trend=overall_trend,
stroke_order_score=round(stroke_score, 1),
stroke_order_trend=stroke_trend,
quality_score=round(quality_score, 1),
quality_trend=quality_trend,
speed_score=round(speed_score, 1),
speed_trend=speed_trend,
structure_score=round(structure_score, 1),
structure_trend=structure_trend,
snapshots=snapshots,
most_improved_chars=most_improved[:10],
needs_practice_chars=needs_practice[:10],
total_practice_sessions=total_sessions,
total_characters_written=total_chars,
avg_daily_practice_minutes=round(avg_daily, 1),
analyzed_at=datetime.now().isoformat(),
)
return report
async def _fetch_writing_scores(
self, student_id: str, start: str, end: str
) -> List[Dict[str, Any]]:
"""从ClickHouse获取书写评分原始数据"""
# query = """
# SELECT date, stroke_order_accuracy, writing_quality,
# writing_speed, char_structure, practice_count, total_chars
# FROM writing_growth
# WHERE student_id = %(sid)s
# AND date BETWEEN %(start)s AND %(end)s
# ORDER BY date
# """
return []
async def _fetch_character_scores(
self, student_id: str, start: str, end: str
) -> List[Dict[str, Any]]:
"""获取单字练习评分数据"""
# query = """
# SELECT character, score, practice_at
# FROM practice_records
# WHERE student_id = %(sid)s
# AND practice_at BETWEEN %(start)s AND %(end)s
# ORDER BY character, practice_at
# """
return []
def _aggregate_by_period(
self,
raw_data: List[Dict[str, Any]],
granularity: str,
) -> List[WritingSnapshot]:
"""按时间粒度聚合书写评分"""
if not raw_data:
return []
# 按日期分组
period_map: Dict[str, List[Dict[str, Any]]] = {}
for record in raw_data:
date_str = record.get("date", "")
if granularity == "weekly":
# 按周分组(取周一日期)
dt = datetime.fromisoformat(date_str)
week_start = dt - timedelta(days=dt.weekday())
period_key = week_start.date().isoformat()
elif granularity == "monthly":
period_key = date_str[:7] # YYYY-MM
else:
period_key = date_str
period_map.setdefault(period_key, []).append(record)
# 聚合每个周期
snapshots: List[WritingSnapshot] = []
for period, records in sorted(period_map.items()):
n = len(records)
snapshot = WritingSnapshot(
date=period,
stroke_order_accuracy=sum(
r.get("stroke_order_accuracy", 0) for r in records
) / n,
writing_quality=sum(
r.get("writing_quality", 0) for r in records
) / n,
writing_speed=sum(
r.get("writing_speed", 0) for r in records
) / n,
char_structure=sum(
r.get("char_structure", 0) for r in records
) / n,
practice_count=sum(
r.get("practice_count", 0) for r in records
),
total_chars=sum(
r.get("total_chars", 0) for r in records
),
)
snapshots.append(snapshot)
return snapshots
def _calc_dimension_trend(
self, values: List[float]
) -> Tuple[float, str]:
"""
计算某维度的当前评分和趋势
使用指数移动平均(EMA)平滑数据噪声,
对比最近EMA与早期EMA判断趋势。
"""
if not values:
return 0.0, "stable"
# 指数移动平均(衰减因子0.3
alpha = 0.3
ema_values = [values[0]]
for i in range(1, len(values)):
ema = alpha * values[i] + (1 - alpha) * ema_values[-1]
ema_values.append(ema)
current_score = ema_values[-1]
# 趋势判断:对比前半段和后半段的EMA均值
if len(ema_values) >= 4:
mid = len(ema_values) // 2
early_avg = sum(ema_values[:mid]) / mid
recent_avg = sum(ema_values[mid:]) / (len(ema_values) - mid)
diff = recent_avg - early_avg
if diff > 3:
trend = "improving"
elif diff < -3:
trend = "declining"
else:
trend = "stable"
else:
trend = "stable"
return current_score, trend
def _calc_overall_score(
self,
stroke: float,
quality: float,
speed: float,
structure: float,
) -> float:
"""加权计算综合书写评分"""
return (
stroke * self.WEIGHTS["stroke_order"]
+ quality * self.WEIGHTS["quality"]
+ speed * self.WEIGHTS["speed"]
+ structure * self.WEIGHTS["structure"]
)
def _determine_level(self, score: float) -> str:
"""根据综合评分确定书写等级"""
for level, threshold in self.LEVEL_THRESHOLDS.items():
if score >= threshold:
return level
return "初学"
def _determine_overall_trend(
self, snapshots: List[WritingSnapshot]
) -> str:
"""判断总体趋势"""
if len(snapshots) < 2:
return "stable"
# 计算每个快照的综合分
scores = []
for s in snapshots:
overall = self._calc_overall_score(
s.stroke_order_accuracy,
s.writing_quality,
s.writing_speed,
s.char_structure,
)
scores.append(overall)
# 简单线性回归斜率判断趋势
n = len(scores)
x_mean = (n - 1) / 2
y_mean = sum(scores) / n
numerator = sum(
(i - x_mean) * (scores[i] - y_mean) for i in range(n)
)
denominator = sum((i - x_mean) ** 2 for i in range(n))
if denominator == 0:
return "stable"
slope = numerator / denominator
if slope > 0.5:
return "improving"
elif slope < -0.5:
return "declining"
return "stable"
def _analyze_char_progress(
self, char_data: List[Dict[str, Any]]
) -> Tuple[List[CharacterProgress], List[CharacterProgress]]:
"""
分析单字进步情况
对每个练习过的汉字,比较首次评分和最近评分,
找出进步最大的字和仍需练习的字。
"""
char_map: Dict[str, List[Tuple[float, str]]] = {}
for record in char_data:
char = record.get("character", "")
score = record.get("score", 0.0)
practice_at = record.get("practice_at", "")
char_map.setdefault(char, []).append((score, practice_at))
progress_list: List[CharacterProgress] = []
for char, entries in char_map.items():
# 按时间排序
entries.sort(key=lambda e: e[1])
first_score = entries[0][0]
latest_score = entries[-1][0]
best_score = max(e[0] for e in entries)
improvement = latest_score - first_score
# 掌握等级判定
if latest_score >= 90:
level = "master"
elif latest_score >= 75:
level = "advanced"
elif latest_score >= 60:
level = "intermediate"
else:
level = "beginner"
progress_list.append(CharacterProgress(
character=char,
first_score=first_score,
latest_score=latest_score,
best_score=best_score,
practice_count=len(entries),
improvement=round(improvement, 1),
mastery_level=level,
))
# 按进步幅度降序排列(进步最大的)
most_improved = sorted(
progress_list, key=lambda p: p.improvement, reverse=True
)
# 仍需练习的(最新分低于70且练习次数>3)
needs_practice = sorted(
[
p for p in progress_list
if p.latest_score < 70 and p.practice_count > 3
],
key=lambda p: p.latest_score,
)
return most_improved, needs_practice