# 自然写教学数据分析与学情诊断系统软件 V1.0 # analytics/writing_growth.py - 书写能力成长评测引擎 import logging import math from typing import Any, Dict, List, Optional, Tuple from datetime import datetime, date, timedelta from dataclasses import dataclass, field logger = logging.getLogger("writech.analytics.writing_growth") # ============================================================ # 书写成长数据模型 # ============================================================ @dataclass class WritingSnapshot: """书写能力时间切片""" date: str stroke_order_accuracy: float = 0.0 writing_quality: float = 0.0 writing_speed: float = 0.0 char_structure: float = 0.0 practice_count: int = 0 total_chars: int = 0 @dataclass class CharacterProgress: """单字书写进步记录""" character: str first_score: float latest_score: float best_score: float practice_count: int improvement: float # latest - first mastery_level: str # beginner/intermediate/advanced/master @dataclass class WritingGrowthReport: """书写成长评测报告""" student_id: str period_start: str period_end: str # 总体评级 overall_level: str = "" # 初学/入门/进阶/优秀/精通 overall_score: float = 0.0 overall_trend: str = "stable" # 各维度评分与趋势 stroke_order_score: float = 0.0 stroke_order_trend: str = "stable" quality_score: float = 0.0 quality_trend: str = "stable" speed_score: float = 0.0 speed_trend: str = "stable" structure_score: float = 0.0 structure_trend: str = "stable" # 时序数据 snapshots: List[WritingSnapshot] = field(default_factory=list) # 单字进步排行 most_improved_chars: List[CharacterProgress] = field( default_factory=list ) needs_practice_chars: List[CharacterProgress] = field( default_factory=list ) # 练习统计 total_practice_sessions: int = 0 total_characters_written: int = 0 avg_daily_practice_minutes: float = 0.0 # 生成时间 analyzed_at: str = "" # ============================================================ # 书写成长评测引擎 # ============================================================ class WritingGrowthAnalyzer: """ 书写能力成长评测引擎 功能: 1. 多维度书写能力评分(笔顺、规范性、速度、结构) 2. 成长趋势分析(移动平均法平滑噪声) 3. 单字进步追踪 4. 书写等级评定 5. 书写问题诊断 """ # 书写等级评定标准 LEVEL_THRESHOLDS = { "精通": 95.0, "优秀": 85.0, "进阶": 70.0, "入门": 50.0, "初学": 0.0, } # 各维度权重 WEIGHTS = { "stroke_order": 0.25, "quality": 0.35, "speed": 0.15, "structure": 0.25, } def __init__(self): logger.info("书写成长评测引擎初始化") async def analyze_growth( self, student_id: str, start_date: str, end_date: str, granularity: str = "weekly", ) -> WritingGrowthReport: """ 分析学生书写能力成长情况 Args: student_id: 学生ID start_date: 分析起始日期 end_date: 分析结束日期 granularity: 时间粒度(daily/weekly/monthly) Returns: 书写成长评测报告 """ logger.info( "书写成长分析: student=%s, %s~%s, 粒度=%s", student_id, start_date, end_date, granularity, ) # 1. 获取原始书写评分数据 raw_data = await self._fetch_writing_scores( student_id, start_date, end_date ) # 2. 按时间粒度聚合 snapshots = self._aggregate_by_period(raw_data, granularity) # 3. 计算各维度评分和趋势 stroke_score, stroke_trend = self._calc_dimension_trend( [s.stroke_order_accuracy for s in snapshots] ) quality_score, quality_trend = self._calc_dimension_trend( [s.writing_quality for s in snapshots] ) speed_score, speed_trend = self._calc_dimension_trend( [s.writing_speed for s in snapshots] ) structure_score, structure_trend = self._calc_dimension_trend( [s.char_structure for s in snapshots] ) # 4. 计算综合评分 overall_score = self._calc_overall_score( stroke_score, quality_score, speed_score, structure_score ) overall_level = self._determine_level(overall_score) overall_trend = self._determine_overall_trend(snapshots) # 5. 分析单字进步 char_data = await self._fetch_character_scores( student_id, start_date, end_date ) most_improved, needs_practice = self._analyze_char_progress( char_data ) # 6. 练习统计 total_sessions = sum(s.practice_count for s in snapshots) total_chars = sum(s.total_chars for s in snapshots) days = max( ( datetime.fromisoformat(end_date) - datetime.fromisoformat(start_date) ).days, 1, ) avg_daily = total_chars / days * 0.5 # 估算每日练习分钟 report = WritingGrowthReport( student_id=student_id, period_start=start_date, period_end=end_date, overall_level=overall_level, overall_score=round(overall_score, 1), overall_trend=overall_trend, stroke_order_score=round(stroke_score, 1), stroke_order_trend=stroke_trend, quality_score=round(quality_score, 1), quality_trend=quality_trend, speed_score=round(speed_score, 1), speed_trend=speed_trend, structure_score=round(structure_score, 1), structure_trend=structure_trend, snapshots=snapshots, most_improved_chars=most_improved[:10], needs_practice_chars=needs_practice[:10], total_practice_sessions=total_sessions, total_characters_written=total_chars, avg_daily_practice_minutes=round(avg_daily, 1), analyzed_at=datetime.now().isoformat(), ) return report async def _fetch_writing_scores( self, student_id: str, start: str, end: str ) -> List[Dict[str, Any]]: """从ClickHouse获取书写评分原始数据""" # query = """ # SELECT date, stroke_order_accuracy, writing_quality, # writing_speed, char_structure, practice_count, total_chars # FROM writing_growth # WHERE student_id = %(sid)s # AND date BETWEEN %(start)s AND %(end)s # ORDER BY date # """ return [] async def _fetch_character_scores( self, student_id: str, start: str, end: str ) -> List[Dict[str, Any]]: """获取单字练习评分数据""" # query = """ # SELECT character, score, practice_at # FROM practice_records # WHERE student_id = %(sid)s # AND practice_at BETWEEN %(start)s AND %(end)s # ORDER BY character, practice_at # """ return [] def _aggregate_by_period( self, raw_data: List[Dict[str, Any]], granularity: str, ) -> List[WritingSnapshot]: """按时间粒度聚合书写评分""" if not raw_data: return [] # 按日期分组 period_map: Dict[str, List[Dict[str, Any]]] = {} for record in raw_data: date_str = record.get("date", "") if granularity == "weekly": # 按周分组(取周一日期) dt = datetime.fromisoformat(date_str) week_start = dt - timedelta(days=dt.weekday()) period_key = week_start.date().isoformat() elif granularity == "monthly": period_key = date_str[:7] # YYYY-MM else: period_key = date_str period_map.setdefault(period_key, []).append(record) # 聚合每个周期 snapshots: List[WritingSnapshot] = [] for period, records in sorted(period_map.items()): n = len(records) snapshot = WritingSnapshot( date=period, stroke_order_accuracy=sum( r.get("stroke_order_accuracy", 0) for r in records ) / n, writing_quality=sum( r.get("writing_quality", 0) for r in records ) / n, writing_speed=sum( r.get("writing_speed", 0) for r in records ) / n, char_structure=sum( r.get("char_structure", 0) for r in records ) / n, practice_count=sum( r.get("practice_count", 0) for r in records ), total_chars=sum( r.get("total_chars", 0) for r in records ), ) snapshots.append(snapshot) return snapshots def _calc_dimension_trend( self, values: List[float] ) -> Tuple[float, str]: """ 计算某维度的当前评分和趋势 使用指数移动平均(EMA)平滑数据噪声, 对比最近EMA与早期EMA判断趋势。 """ if not values: return 0.0, "stable" # 指数移动平均(衰减因子0.3) alpha = 0.3 ema_values = [values[0]] for i in range(1, len(values)): ema = alpha * values[i] + (1 - alpha) * ema_values[-1] ema_values.append(ema) current_score = ema_values[-1] # 趋势判断:对比前半段和后半段的EMA均值 if len(ema_values) >= 4: mid = len(ema_values) // 2 early_avg = sum(ema_values[:mid]) / mid recent_avg = sum(ema_values[mid:]) / (len(ema_values) - mid) diff = recent_avg - early_avg if diff > 3: trend = "improving" elif diff < -3: trend = "declining" else: trend = "stable" else: trend = "stable" return current_score, trend def _calc_overall_score( self, stroke: float, quality: float, speed: float, structure: float, ) -> float: """加权计算综合书写评分""" return ( stroke * self.WEIGHTS["stroke_order"] + quality * self.WEIGHTS["quality"] + speed * self.WEIGHTS["speed"] + structure * self.WEIGHTS["structure"] ) def _determine_level(self, score: float) -> str: """根据综合评分确定书写等级""" for level, threshold in self.LEVEL_THRESHOLDS.items(): if score >= threshold: return level return "初学" def _determine_overall_trend( self, snapshots: List[WritingSnapshot] ) -> str: """判断总体趋势""" if len(snapshots) < 2: return "stable" # 计算每个快照的综合分 scores = [] for s in snapshots: overall = self._calc_overall_score( s.stroke_order_accuracy, s.writing_quality, s.writing_speed, s.char_structure, ) scores.append(overall) # 简单线性回归斜率判断趋势 n = len(scores) x_mean = (n - 1) / 2 y_mean = sum(scores) / n numerator = sum( (i - x_mean) * (scores[i] - y_mean) for i in range(n) ) denominator = sum((i - x_mean) ** 2 for i in range(n)) if denominator == 0: return "stable" slope = numerator / denominator if slope > 0.5: return "improving" elif slope < -0.5: return "declining" return "stable" def _analyze_char_progress( self, char_data: List[Dict[str, Any]] ) -> Tuple[List[CharacterProgress], List[CharacterProgress]]: """ 分析单字进步情况 对每个练习过的汉字,比较首次评分和最近评分, 找出进步最大的字和仍需练习的字。 """ char_map: Dict[str, List[Tuple[float, str]]] = {} for record in char_data: char = record.get("character", "") score = record.get("score", 0.0) practice_at = record.get("practice_at", "") char_map.setdefault(char, []).append((score, practice_at)) progress_list: List[CharacterProgress] = [] for char, entries in char_map.items(): # 按时间排序 entries.sort(key=lambda e: e[1]) first_score = entries[0][0] latest_score = entries[-1][0] best_score = max(e[0] for e in entries) improvement = latest_score - first_score # 掌握等级判定 if latest_score >= 90: level = "master" elif latest_score >= 75: level = "advanced" elif latest_score >= 60: level = "intermediate" else: level = "beginner" progress_list.append(CharacterProgress( character=char, first_score=first_score, latest_score=latest_score, best_score=best_score, practice_count=len(entries), improvement=round(improvement, 1), mastery_level=level, )) # 按进步幅度降序排列(进步最大的) most_improved = sorted( progress_list, key=lambda p: p.improvement, reverse=True ) # 仍需练习的(最新分低于70且练习次数>3) needs_practice = sorted( [ p for p in progress_list if p.latest_score < 70 and p.practice_count > 3 ], key=lambda p: p.latest_score, ) return most_improved, needs_practice