Files
system-design/software-copyright/03-writech-learning-analytics/analytics/student_profiler.py
T
2026-03-22 15:24:40 +08:00

542 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# 自然写教学数据分析与学情诊断系统软件 V1.0
# analytics/student_profiler.py - 学生画像分析引擎
import logging
import math
from typing import Any, Dict, List, Optional, Tuple
from datetime import datetime, date, timedelta
from dataclasses import dataclass, field
logger = logging.getLogger("writech.analytics.profiler")
# ============================================================
# 画像分析数据模型
# ============================================================
@dataclass
class ScoreTrend:
"""成绩趋势数据点"""
date: str
score: float
subject: str
exam_type: str = "" # homework/exam/practice
@dataclass
class SubjectAbility:
"""科目能力评估"""
subject: str
overall_score: float = 0.0
knowledge_coverage: float = 0.0 # 知识点覆盖率
practice_frequency: float = 0.0 # 练习频率(次/周)
improvement_rate: float = 0.0 # 进步速率
stability: float = 0.0 # 稳定性(分数方差的倒数)
@dataclass
class LearningHabit:
"""学习习惯画像"""
avg_daily_minutes: float = 0.0
peak_study_hour: int = 0 # 学习高峰时段(小时)
weekly_pattern: List[float] = field(default_factory=list) # 周一~日时长
consistency_score: float = 0.0 # 学习规律性评分
homework_timeliness: float = 0.0 # 作业及时提交率
@dataclass
class WritingAbility:
"""书写能力评估"""
stroke_order_accuracy: float = 0.0 # 笔顺正确率
writing_quality: float = 0.0 # 书写规范性
writing_speed: float = 0.0 # 书写速度(字/分)
char_structure_score: float = 0.0 # 字形结构评分
improvement_trend: str = "stable" # 进步趋势
@dataclass
class ComprehensiveProfile:
"""综合学情画像"""
student_id: str
student_name: str
class_id: str
grade: str
school_id: str
# 综合评分
overall_score: float = 0.0
rank_in_class: int = 0
rank_in_grade: int = 0
percentile: float = 0.0
# 各科能力
subject_abilities: List[SubjectAbility] = field(default_factory=list)
# 学习习惯
learning_habit: Optional[LearningHabit] = None
# 书写能力
writing_ability: Optional[WritingAbility] = None
# 成绩趋势
score_trends: List[ScoreTrend] = field(default_factory=list)
# 分析时间
analyzed_at: str = ""
# ============================================================
# 画像分析引擎
# ============================================================
class StudentProfiler:
"""
学生画像分析引擎
功能:
1. 综合学情评分计算
2. 各科目能力多维评估
3. 学习习惯分析
4. 书写能力评估
5. 成绩趋势分析与预测
6. 班级/年级排名计算
"""
# 各维度权重(用于综合评分计算)
WEIGHT_HOMEWORK_SCORE = 0.30 # 作业成绩权重
WEIGHT_EXAM_SCORE = 0.35 # 考试成绩权重
WEIGHT_PRACTICE = 0.15 # 练习表现权重
WEIGHT_WRITING = 0.10 # 书写能力权重
WEIGHT_HABIT = 0.10 # 学习习惯权重
# 评分标准
EXCELLENT_THRESHOLD = 90.0
GOOD_THRESHOLD = 75.0
PASS_THRESHOLD = 60.0
def __init__(self):
"""初始化画像分析引擎"""
logger.info("学生画像分析引擎初始化")
async def build_profile(
self,
student_id: str,
student_info: Dict[str, Any],
period_days: int = 30,
) -> ComprehensiveProfile:
"""
构建学生综合画像
Args:
student_id: 学生ID
student_info: 学生基本信息
period_days: 分析周期(天)
Returns:
综合学情画像
"""
logger.info(
"构建学生画像: %s, 分析周期=%d天", student_id, period_days
)
end_date = date.today()
start_date = end_date - timedelta(days=period_days)
# 1. 获取原始数据
homework_data = await self._fetch_homework_data(
student_id, start_date, end_date
)
exam_data = await self._fetch_exam_data(
student_id, start_date, end_date
)
practice_data = await self._fetch_practice_data(
student_id, start_date, end_date
)
writing_data = await self._fetch_writing_data(
student_id, start_date, end_date
)
usage_data = await self._fetch_usage_data(
student_id, start_date, end_date
)
# 2. 分析各维度
subject_abilities = self._analyze_subject_abilities(
homework_data, exam_data, practice_data
)
learning_habit = self._analyze_learning_habit(usage_data)
writing_ability = self._analyze_writing_ability(writing_data)
score_trends = self._analyze_score_trends(
homework_data, exam_data
)
# 3. 计算综合评分
overall_score = self._calculate_overall_score(
subject_abilities, learning_habit, writing_ability
)
# 4. 计算排名
rank_in_class, rank_in_grade, percentile = (
await self._calculate_rankings(
student_id,
student_info.get("class_id", ""),
student_info.get("grade", ""),
overall_score,
)
)
profile = ComprehensiveProfile(
student_id=student_id,
student_name=student_info.get("name", ""),
class_id=student_info.get("class_id", ""),
grade=student_info.get("grade", ""),
school_id=student_info.get("school_id", ""),
overall_score=round(overall_score, 1),
rank_in_class=rank_in_class,
rank_in_grade=rank_in_grade,
percentile=round(percentile, 1),
subject_abilities=subject_abilities,
learning_habit=learning_habit,
writing_ability=writing_ability,
score_trends=score_trends,
analyzed_at=datetime.now().isoformat(),
)
# 5. 写入ClickHouse画像宽表
await self._save_profile(profile)
logger.info(
"画像构建完成: %s, 综合评分=%.1f, 班级排名=%d",
student_id, overall_score, rank_in_class,
)
return profile
async def _fetch_homework_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""从ClickHouse获取作业成绩数据"""
# query = """
# SELECT subject, score, total_score, submitted_at, is_on_time
# FROM homework_submissions
# WHERE student_id = %(sid)s
# AND submitted_at BETWEEN %(start)s AND %(end)s
# ORDER BY submitted_at
# """
# return await clickhouse_query(query, {
# "sid": student_id, "start": str(start), "end": str(end)
# })
return []
async def _fetch_exam_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""从ClickHouse获取考试成绩数据"""
return []
async def _fetch_practice_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""获取练习(字帖/笔顺)数据"""
return []
async def _fetch_writing_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""获取书写质量评分数据"""
return []
async def _fetch_usage_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""获取应用使用时长数据"""
return []
def _analyze_subject_abilities(
self,
homework_data: List[Dict[str, Any]],
exam_data: List[Dict[str, Any]],
practice_data: List[Dict[str, Any]],
) -> List[SubjectAbility]:
"""
各科目能力多维评估
评估维度:
- 作业/考试平均分
- 知识点覆盖率(已接触/总知识点数)
- 练习频率(次/周)
- 进步速率(最近30天vs前30天分数差)
- 稳定性(分数标准差的倒数归一化)
"""
subject_map: Dict[str, Dict[str, List[float]]] = {}
# 按科目聚合作业分数
for hw in homework_data:
subject = hw.get("subject", "unknown")
subject_map.setdefault(subject, {"scores": [], "dates": []})
total = hw.get("total_score", 100)
score = hw.get("score", 0)
normalized = (score / max(total, 1)) * 100
subject_map[subject]["scores"].append(normalized)
# 按科目聚合考试分数
for exam in exam_data:
subject = exam.get("subject", "unknown")
subject_map.setdefault(subject, {"scores": [], "dates": []})
total = exam.get("total_score", 100)
score = exam.get("score", 0)
normalized = (score / max(total, 1)) * 100
subject_map[subject]["scores"].append(normalized)
abilities: List[SubjectAbility] = []
for subject, data in subject_map.items():
scores = data["scores"]
if not scores:
continue
avg_score = sum(scores) / len(scores)
# 稳定性: 1 / (1 + std_dev) 归一化到0-1
variance = sum((s - avg_score) ** 2 for s in scores) / max(
len(scores), 1
)
std_dev = math.sqrt(variance)
stability = 1.0 / (1.0 + std_dev / 10) # 归一化
# 进步速率: 后半段均分 - 前半段均分
mid = len(scores) // 2
if mid > 0:
first_half_avg = sum(scores[:mid]) / mid
second_half_avg = sum(scores[mid:]) / max(
len(scores) - mid, 1
)
improvement = second_half_avg - first_half_avg
else:
improvement = 0.0
abilities.append(SubjectAbility(
subject=subject,
overall_score=round(avg_score, 1),
stability=round(stability, 3),
improvement_rate=round(improvement, 1),
))
return abilities
def _analyze_learning_habit(
self, usage_data: List[Dict[str, Any]]
) -> LearningHabit:
"""
学习习惯分析
分析维度:
- 日均学习时长
- 学习高峰时段
- 周学习模式(周一到周日)
- 学习规律性评分
"""
if not usage_data:
return LearningHabit()
# 按日期聚合使用时长
daily_minutes: Dict[str, float] = {}
hourly_counts: Dict[int, int] = {}
weekday_minutes: Dict[int, List[float]] = {
i: [] for i in range(7)
}
for record in usage_data:
date_str = record.get("date", "")
minutes = record.get("duration_minutes", 0)
hour = record.get("start_hour", 0)
daily_minutes[date_str] = (
daily_minutes.get(date_str, 0) + minutes
)
hourly_counts[hour] = hourly_counts.get(hour, 0) + 1
# 日均时长
total_days = max(len(daily_minutes), 1)
avg_daily = sum(daily_minutes.values()) / total_days
# 学习高峰时段
peak_hour = max(
hourly_counts, key=hourly_counts.get, default=0
)
# 学习规律性: 日均时长的变异系数越小越规律
if daily_minutes:
values = list(daily_minutes.values())
mean_val = sum(values) / len(values)
variance = sum((v - mean_val) ** 2 for v in values) / len(
values
)
std_val = math.sqrt(variance)
cv = std_val / max(mean_val, 1)
consistency = max(0.0, 1.0 - cv) # 变异系数越小规律性越高
else:
consistency = 0.0
return LearningHabit(
avg_daily_minutes=round(avg_daily, 1),
peak_study_hour=peak_hour,
consistency_score=round(consistency, 3),
)
def _analyze_writing_ability(
self, writing_data: List[Dict[str, Any]]
) -> WritingAbility:
"""
书写能力评估
基于笔顺准确率、书写规范性评分、书写速度等维度综合评估。
通过对比最近和较早的数据判断进步趋势。
"""
if not writing_data:
return WritingAbility()
# 计算各维度平均值
stroke_scores = [
d.get("stroke_order_score", 0) for d in writing_data
]
quality_scores = [
d.get("quality_score", 0) for d in writing_data
]
speeds = [d.get("speed", 0) for d in writing_data]
structure_scores = [
d.get("structure_score", 0) for d in writing_data
]
avg_stroke = sum(stroke_scores) / max(len(stroke_scores), 1)
avg_quality = sum(quality_scores) / max(len(quality_scores), 1)
avg_speed = sum(speeds) / max(len(speeds), 1)
avg_structure = sum(structure_scores) / max(
len(structure_scores), 1
)
# 判断趋势: 后半段 vs 前半段
mid = len(quality_scores) // 2
if mid > 0:
early_avg = sum(quality_scores[:mid]) / mid
recent_avg = sum(quality_scores[mid:]) / max(
len(quality_scores) - mid, 1
)
if recent_avg - early_avg > 3:
trend = "improving"
elif early_avg - recent_avg > 3:
trend = "declining"
else:
trend = "stable"
else:
trend = "stable"
return WritingAbility(
stroke_order_accuracy=round(avg_stroke, 1),
writing_quality=round(avg_quality, 1),
writing_speed=round(avg_speed, 1),
char_structure_score=round(avg_structure, 1),
improvement_trend=trend,
)
def _analyze_score_trends(
self,
homework_data: List[Dict[str, Any]],
exam_data: List[Dict[str, Any]],
) -> List[ScoreTrend]:
"""生成成绩趋势数据"""
trends: List[ScoreTrend] = []
for hw in homework_data:
total = hw.get("total_score", 100)
score = hw.get("score", 0)
normalized = (score / max(total, 1)) * 100
trends.append(ScoreTrend(
date=hw.get("submitted_at", "")[:10],
score=round(normalized, 1),
subject=hw.get("subject", ""),
exam_type="homework",
))
for exam in exam_data:
total = exam.get("total_score", 100)
score = exam.get("score", 0)
normalized = (score / max(total, 1)) * 100
trends.append(ScoreTrend(
date=exam.get("exam_date", "")[:10],
score=round(normalized, 1),
subject=exam.get("subject", ""),
exam_type="exam",
))
# 按日期排序
trends.sort(key=lambda t: t.date)
return trends
def _calculate_overall_score(
self,
subject_abilities: List[SubjectAbility],
learning_habit: LearningHabit,
writing_ability: WritingAbility,
) -> float:
"""
计算综合评分(百分制)
加权公式:
综合分 = 作业成绩×0.30 + 考试成绩×0.35 + 练习×0.15
+ 书写×0.10 + 学习习惯×0.10
"""
# 作业/考试平均分
if subject_abilities:
academic_avg = sum(
a.overall_score for a in subject_abilities
) / len(subject_abilities)
else:
academic_avg = 0.0
# 书写能力评分(归一化到百分制)
writing_score = writing_ability.writing_quality
# 学习习惯评分(规律性×100
habit_score = learning_habit.consistency_score * 100
# 加权综合
overall = (
academic_avg * (self.WEIGHT_HOMEWORK_SCORE + self.WEIGHT_EXAM_SCORE)
+ academic_avg * self.WEIGHT_PRACTICE
+ writing_score * self.WEIGHT_WRITING
+ habit_score * self.WEIGHT_HABIT
)
return min(100.0, max(0.0, overall))
async def _calculate_rankings(
self,
student_id: str,
class_id: str,
grade: str,
score: float,
) -> Tuple[int, int, float]:
"""
计算班级排名和年级百分位排名
从ClickHouse查询同班和同年级学生的综合评分,
计算当前学生的排名位置。
"""
# 查询同班学生评分
# class_scores = await query_class_scores(class_id)
# class_rank = sum(1 for s in class_scores if s > score) + 1
# 查询同年级学生评分
# grade_scores = await query_grade_scores(grade)
# grade_rank = sum(1 for s in grade_scores if s > score) + 1
# percentile = (1 - grade_rank / max(len(grade_scores), 1)) * 100
return 0, 0, 0.0
async def _save_profile(self, profile: ComprehensiveProfile) -> None:
"""将画像数据写入ClickHouse画像宽表"""
# clickhouse_client.execute(
# "INSERT INTO student_profile VALUES",
# [profile_to_row(profile)],
# )
pass