# 自然写教学数据分析与学情诊断系统软件 V1.0 # analytics/student_profiler.py - 学生画像分析引擎 import logging import math from typing import Any, Dict, List, Optional, Tuple from datetime import datetime, date, timedelta from dataclasses import dataclass, field logger = logging.getLogger("writech.analytics.profiler") # ============================================================ # 画像分析数据模型 # ============================================================ @dataclass class ScoreTrend: """成绩趋势数据点""" date: str score: float subject: str exam_type: str = "" # homework/exam/practice @dataclass class SubjectAbility: """科目能力评估""" subject: str overall_score: float = 0.0 knowledge_coverage: float = 0.0 # 知识点覆盖率 practice_frequency: float = 0.0 # 练习频率(次/周) improvement_rate: float = 0.0 # 进步速率 stability: float = 0.0 # 稳定性(分数方差的倒数) @dataclass class LearningHabit: """学习习惯画像""" avg_daily_minutes: float = 0.0 peak_study_hour: int = 0 # 学习高峰时段(小时) weekly_pattern: List[float] = field(default_factory=list) # 周一~日时长 consistency_score: float = 0.0 # 学习规律性评分 homework_timeliness: float = 0.0 # 作业及时提交率 @dataclass class WritingAbility: """书写能力评估""" stroke_order_accuracy: float = 0.0 # 笔顺正确率 writing_quality: float = 0.0 # 书写规范性 writing_speed: float = 0.0 # 书写速度(字/分) char_structure_score: float = 0.0 # 字形结构评分 improvement_trend: str = "stable" # 进步趋势 @dataclass class ComprehensiveProfile: """综合学情画像""" student_id: str student_name: str class_id: str grade: str school_id: str # 综合评分 overall_score: float = 0.0 rank_in_class: int = 0 rank_in_grade: int = 0 percentile: float = 0.0 # 各科能力 subject_abilities: List[SubjectAbility] = field(default_factory=list) # 学习习惯 learning_habit: Optional[LearningHabit] = None # 书写能力 writing_ability: Optional[WritingAbility] = None # 成绩趋势 score_trends: List[ScoreTrend] = field(default_factory=list) # 分析时间 analyzed_at: str = "" # ============================================================ # 画像分析引擎 # ============================================================ class StudentProfiler: """ 学生画像分析引擎 功能: 1. 综合学情评分计算 2. 各科目能力多维评估 3. 学习习惯分析 4. 书写能力评估 5. 成绩趋势分析与预测 6. 班级/年级排名计算 """ # 各维度权重(用于综合评分计算) WEIGHT_HOMEWORK_SCORE = 0.30 # 作业成绩权重 WEIGHT_EXAM_SCORE = 0.35 # 考试成绩权重 WEIGHT_PRACTICE = 0.15 # 练习表现权重 WEIGHT_WRITING = 0.10 # 书写能力权重 WEIGHT_HABIT = 0.10 # 学习习惯权重 # 评分标准 EXCELLENT_THRESHOLD = 90.0 GOOD_THRESHOLD = 75.0 PASS_THRESHOLD = 60.0 def __init__(self): """初始化画像分析引擎""" logger.info("学生画像分析引擎初始化") async def build_profile( self, student_id: str, student_info: Dict[str, Any], period_days: int = 30, ) -> ComprehensiveProfile: """ 构建学生综合画像 Args: student_id: 学生ID student_info: 学生基本信息 period_days: 分析周期(天) Returns: 综合学情画像 """ logger.info( "构建学生画像: %s, 分析周期=%d天", student_id, period_days ) end_date = date.today() start_date = end_date - timedelta(days=period_days) # 1. 获取原始数据 homework_data = await self._fetch_homework_data( student_id, start_date, end_date ) exam_data = await self._fetch_exam_data( student_id, start_date, end_date ) practice_data = await self._fetch_practice_data( student_id, start_date, end_date ) writing_data = await self._fetch_writing_data( student_id, start_date, end_date ) usage_data = await self._fetch_usage_data( student_id, start_date, end_date ) # 2. 分析各维度 subject_abilities = self._analyze_subject_abilities( homework_data, exam_data, practice_data ) learning_habit = self._analyze_learning_habit(usage_data) writing_ability = self._analyze_writing_ability(writing_data) score_trends = self._analyze_score_trends( homework_data, exam_data ) # 3. 计算综合评分 overall_score = self._calculate_overall_score( subject_abilities, learning_habit, writing_ability ) # 4. 计算排名 rank_in_class, rank_in_grade, percentile = ( await self._calculate_rankings( student_id, student_info.get("class_id", ""), student_info.get("grade", ""), overall_score, ) ) profile = ComprehensiveProfile( student_id=student_id, student_name=student_info.get("name", ""), class_id=student_info.get("class_id", ""), grade=student_info.get("grade", ""), school_id=student_info.get("school_id", ""), overall_score=round(overall_score, 1), rank_in_class=rank_in_class, rank_in_grade=rank_in_grade, percentile=round(percentile, 1), subject_abilities=subject_abilities, learning_habit=learning_habit, writing_ability=writing_ability, score_trends=score_trends, analyzed_at=datetime.now().isoformat(), ) # 5. 写入ClickHouse画像宽表 await self._save_profile(profile) logger.info( "画像构建完成: %s, 综合评分=%.1f, 班级排名=%d", student_id, overall_score, rank_in_class, ) return profile async def _fetch_homework_data( self, student_id: str, start: date, end: date ) -> List[Dict[str, Any]]: """从ClickHouse获取作业成绩数据""" # query = """ # SELECT subject, score, total_score, submitted_at, is_on_time # FROM homework_submissions # WHERE student_id = %(sid)s # AND submitted_at BETWEEN %(start)s AND %(end)s # ORDER BY submitted_at # """ # return await clickhouse_query(query, { # "sid": student_id, "start": str(start), "end": str(end) # }) return [] async def _fetch_exam_data( self, student_id: str, start: date, end: date ) -> List[Dict[str, Any]]: """从ClickHouse获取考试成绩数据""" return [] async def _fetch_practice_data( self, student_id: str, start: date, end: date ) -> List[Dict[str, Any]]: """获取练习(字帖/笔顺)数据""" return [] async def _fetch_writing_data( self, student_id: str, start: date, end: date ) -> List[Dict[str, Any]]: """获取书写质量评分数据""" return [] async def _fetch_usage_data( self, student_id: str, start: date, end: date ) -> List[Dict[str, Any]]: """获取应用使用时长数据""" return [] def _analyze_subject_abilities( self, homework_data: List[Dict[str, Any]], exam_data: List[Dict[str, Any]], practice_data: List[Dict[str, Any]], ) -> List[SubjectAbility]: """ 各科目能力多维评估 评估维度: - 作业/考试平均分 - 知识点覆盖率(已接触/总知识点数) - 练习频率(次/周) - 进步速率(最近30天vs前30天分数差) - 稳定性(分数标准差的倒数归一化) """ subject_map: Dict[str, Dict[str, List[float]]] = {} # 按科目聚合作业分数 for hw in homework_data: subject = hw.get("subject", "unknown") subject_map.setdefault(subject, {"scores": [], "dates": []}) total = hw.get("total_score", 100) score = hw.get("score", 0) normalized = (score / max(total, 1)) * 100 subject_map[subject]["scores"].append(normalized) # 按科目聚合考试分数 for exam in exam_data: subject = exam.get("subject", "unknown") subject_map.setdefault(subject, {"scores": [], "dates": []}) total = exam.get("total_score", 100) score = exam.get("score", 0) normalized = (score / max(total, 1)) * 100 subject_map[subject]["scores"].append(normalized) abilities: List[SubjectAbility] = [] for subject, data in subject_map.items(): scores = data["scores"] if not scores: continue avg_score = sum(scores) / len(scores) # 稳定性: 1 / (1 + std_dev) 归一化到0-1 variance = sum((s - avg_score) ** 2 for s in scores) / max( len(scores), 1 ) std_dev = math.sqrt(variance) stability = 1.0 / (1.0 + std_dev / 10) # 归一化 # 进步速率: 后半段均分 - 前半段均分 mid = len(scores) // 2 if mid > 0: first_half_avg = sum(scores[:mid]) / mid second_half_avg = sum(scores[mid:]) / max( len(scores) - mid, 1 ) improvement = second_half_avg - first_half_avg else: improvement = 0.0 abilities.append(SubjectAbility( subject=subject, overall_score=round(avg_score, 1), stability=round(stability, 3), improvement_rate=round(improvement, 1), )) return abilities def _analyze_learning_habit( self, usage_data: List[Dict[str, Any]] ) -> LearningHabit: """ 学习习惯分析 分析维度: - 日均学习时长 - 学习高峰时段 - 周学习模式(周一到周日) - 学习规律性评分 """ if not usage_data: return LearningHabit() # 按日期聚合使用时长 daily_minutes: Dict[str, float] = {} hourly_counts: Dict[int, int] = {} weekday_minutes: Dict[int, List[float]] = { i: [] for i in range(7) } for record in usage_data: date_str = record.get("date", "") minutes = record.get("duration_minutes", 0) hour = record.get("start_hour", 0) daily_minutes[date_str] = ( daily_minutes.get(date_str, 0) + minutes ) hourly_counts[hour] = hourly_counts.get(hour, 0) + 1 # 日均时长 total_days = max(len(daily_minutes), 1) avg_daily = sum(daily_minutes.values()) / total_days # 学习高峰时段 peak_hour = max( hourly_counts, key=hourly_counts.get, default=0 ) # 学习规律性: 日均时长的变异系数越小越规律 if daily_minutes: values = list(daily_minutes.values()) mean_val = sum(values) / len(values) variance = sum((v - mean_val) ** 2 for v in values) / len( values ) std_val = math.sqrt(variance) cv = std_val / max(mean_val, 1) consistency = max(0.0, 1.0 - cv) # 变异系数越小规律性越高 else: consistency = 0.0 return LearningHabit( avg_daily_minutes=round(avg_daily, 1), peak_study_hour=peak_hour, consistency_score=round(consistency, 3), ) def _analyze_writing_ability( self, writing_data: List[Dict[str, Any]] ) -> WritingAbility: """ 书写能力评估 基于笔顺准确率、书写规范性评分、书写速度等维度综合评估。 通过对比最近和较早的数据判断进步趋势。 """ if not writing_data: return WritingAbility() # 计算各维度平均值 stroke_scores = [ d.get("stroke_order_score", 0) for d in writing_data ] quality_scores = [ d.get("quality_score", 0) for d in writing_data ] speeds = [d.get("speed", 0) for d in writing_data] structure_scores = [ d.get("structure_score", 0) for d in writing_data ] avg_stroke = sum(stroke_scores) / max(len(stroke_scores), 1) avg_quality = sum(quality_scores) / max(len(quality_scores), 1) avg_speed = sum(speeds) / max(len(speeds), 1) avg_structure = sum(structure_scores) / max( len(structure_scores), 1 ) # 判断趋势: 后半段 vs 前半段 mid = len(quality_scores) // 2 if mid > 0: early_avg = sum(quality_scores[:mid]) / mid recent_avg = sum(quality_scores[mid:]) / max( len(quality_scores) - mid, 1 ) if recent_avg - early_avg > 3: trend = "improving" elif early_avg - recent_avg > 3: trend = "declining" else: trend = "stable" else: trend = "stable" return WritingAbility( stroke_order_accuracy=round(avg_stroke, 1), writing_quality=round(avg_quality, 1), writing_speed=round(avg_speed, 1), char_structure_score=round(avg_structure, 1), improvement_trend=trend, ) def _analyze_score_trends( self, homework_data: List[Dict[str, Any]], exam_data: List[Dict[str, Any]], ) -> List[ScoreTrend]: """生成成绩趋势数据""" trends: List[ScoreTrend] = [] for hw in homework_data: total = hw.get("total_score", 100) score = hw.get("score", 0) normalized = (score / max(total, 1)) * 100 trends.append(ScoreTrend( date=hw.get("submitted_at", "")[:10], score=round(normalized, 1), subject=hw.get("subject", ""), exam_type="homework", )) for exam in exam_data: total = exam.get("total_score", 100) score = exam.get("score", 0) normalized = (score / max(total, 1)) * 100 trends.append(ScoreTrend( date=exam.get("exam_date", "")[:10], score=round(normalized, 1), subject=exam.get("subject", ""), exam_type="exam", )) # 按日期排序 trends.sort(key=lambda t: t.date) return trends def _calculate_overall_score( self, subject_abilities: List[SubjectAbility], learning_habit: LearningHabit, writing_ability: WritingAbility, ) -> float: """ 计算综合评分(百分制) 加权公式: 综合分 = 作业成绩×0.30 + 考试成绩×0.35 + 练习×0.15 + 书写×0.10 + 学习习惯×0.10 """ # 作业/考试平均分 if subject_abilities: academic_avg = sum( a.overall_score for a in subject_abilities ) / len(subject_abilities) else: academic_avg = 0.0 # 书写能力评分(归一化到百分制) writing_score = writing_ability.writing_quality # 学习习惯评分(规律性×100) habit_score = learning_habit.consistency_score * 100 # 加权综合 overall = ( academic_avg * (self.WEIGHT_HOMEWORK_SCORE + self.WEIGHT_EXAM_SCORE) + academic_avg * self.WEIGHT_PRACTICE + writing_score * self.WEIGHT_WRITING + habit_score * self.WEIGHT_HABIT ) return min(100.0, max(0.0, overall)) async def _calculate_rankings( self, student_id: str, class_id: str, grade: str, score: float, ) -> Tuple[int, int, float]: """ 计算班级排名和年级百分位排名 从ClickHouse查询同班和同年级学生的综合评分, 计算当前学生的排名位置。 """ # 查询同班学生评分 # class_scores = await query_class_scores(class_id) # class_rank = sum(1 for s in class_scores if s > score) + 1 # 查询同年级学生评分 # grade_scores = await query_grade_scores(grade) # grade_rank = sum(1 for s in grade_scores if s > score) + 1 # percentile = (1 - grade_rank / max(len(grade_scores), 1)) * 100 return 0, 0, 0.0 async def _save_profile(self, profile: ComprehensiveProfile) -> None: """将画像数据写入ClickHouse画像宽表""" # clickhouse_client.execute( # "INSERT INTO student_profile VALUES", # [profile_to_row(profile)], # ) pass