542 lines
17 KiB
Python
542 lines
17 KiB
Python
# 自然写教学数据分析与学情诊断系统软件 V1.0
|
||
# analytics/student_profiler.py - 学生画像分析引擎
|
||
|
||
import logging
|
||
import math
|
||
from typing import Any, Dict, List, Optional, Tuple
|
||
from datetime import datetime, date, timedelta
|
||
from dataclasses import dataclass, field
|
||
|
||
logger = logging.getLogger("writech.analytics.profiler")
|
||
|
||
|
||
# ============================================================
|
||
# 画像分析数据模型
|
||
# ============================================================
|
||
|
||
@dataclass
|
||
class ScoreTrend:
|
||
"""成绩趋势数据点"""
|
||
date: str
|
||
score: float
|
||
subject: str
|
||
exam_type: str = "" # homework/exam/practice
|
||
|
||
|
||
@dataclass
|
||
class SubjectAbility:
|
||
"""科目能力评估"""
|
||
subject: str
|
||
overall_score: float = 0.0
|
||
knowledge_coverage: float = 0.0 # 知识点覆盖率
|
||
practice_frequency: float = 0.0 # 练习频率(次/周)
|
||
improvement_rate: float = 0.0 # 进步速率
|
||
stability: float = 0.0 # 稳定性(分数方差的倒数)
|
||
|
||
|
||
@dataclass
|
||
class LearningHabit:
|
||
"""学习习惯画像"""
|
||
avg_daily_minutes: float = 0.0
|
||
peak_study_hour: int = 0 # 学习高峰时段(小时)
|
||
weekly_pattern: List[float] = field(default_factory=list) # 周一~日时长
|
||
consistency_score: float = 0.0 # 学习规律性评分
|
||
homework_timeliness: float = 0.0 # 作业及时提交率
|
||
|
||
|
||
@dataclass
|
||
class WritingAbility:
|
||
"""书写能力评估"""
|
||
stroke_order_accuracy: float = 0.0 # 笔顺正确率
|
||
writing_quality: float = 0.0 # 书写规范性
|
||
writing_speed: float = 0.0 # 书写速度(字/分)
|
||
char_structure_score: float = 0.0 # 字形结构评分
|
||
improvement_trend: str = "stable" # 进步趋势
|
||
|
||
|
||
@dataclass
|
||
class ComprehensiveProfile:
|
||
"""综合学情画像"""
|
||
student_id: str
|
||
student_name: str
|
||
class_id: str
|
||
grade: str
|
||
school_id: str
|
||
|
||
# 综合评分
|
||
overall_score: float = 0.0
|
||
rank_in_class: int = 0
|
||
rank_in_grade: int = 0
|
||
percentile: float = 0.0
|
||
|
||
# 各科能力
|
||
subject_abilities: List[SubjectAbility] = field(default_factory=list)
|
||
|
||
# 学习习惯
|
||
learning_habit: Optional[LearningHabit] = None
|
||
|
||
# 书写能力
|
||
writing_ability: Optional[WritingAbility] = None
|
||
|
||
# 成绩趋势
|
||
score_trends: List[ScoreTrend] = field(default_factory=list)
|
||
|
||
# 分析时间
|
||
analyzed_at: str = ""
|
||
|
||
|
||
# ============================================================
|
||
# 画像分析引擎
|
||
# ============================================================
|
||
|
||
class StudentProfiler:
|
||
"""
|
||
学生画像分析引擎
|
||
|
||
功能:
|
||
1. 综合学情评分计算
|
||
2. 各科目能力多维评估
|
||
3. 学习习惯分析
|
||
4. 书写能力评估
|
||
5. 成绩趋势分析与预测
|
||
6. 班级/年级排名计算
|
||
"""
|
||
|
||
# 各维度权重(用于综合评分计算)
|
||
WEIGHT_HOMEWORK_SCORE = 0.30 # 作业成绩权重
|
||
WEIGHT_EXAM_SCORE = 0.35 # 考试成绩权重
|
||
WEIGHT_PRACTICE = 0.15 # 练习表现权重
|
||
WEIGHT_WRITING = 0.10 # 书写能力权重
|
||
WEIGHT_HABIT = 0.10 # 学习习惯权重
|
||
|
||
# 评分标准
|
||
EXCELLENT_THRESHOLD = 90.0
|
||
GOOD_THRESHOLD = 75.0
|
||
PASS_THRESHOLD = 60.0
|
||
|
||
def __init__(self):
|
||
"""初始化画像分析引擎"""
|
||
logger.info("学生画像分析引擎初始化")
|
||
|
||
async def build_profile(
|
||
self,
|
||
student_id: str,
|
||
student_info: Dict[str, Any],
|
||
period_days: int = 30,
|
||
) -> ComprehensiveProfile:
|
||
"""
|
||
构建学生综合画像
|
||
|
||
Args:
|
||
student_id: 学生ID
|
||
student_info: 学生基本信息
|
||
period_days: 分析周期(天)
|
||
|
||
Returns:
|
||
综合学情画像
|
||
"""
|
||
logger.info(
|
||
"构建学生画像: %s, 分析周期=%d天", student_id, period_days
|
||
)
|
||
|
||
end_date = date.today()
|
||
start_date = end_date - timedelta(days=period_days)
|
||
|
||
# 1. 获取原始数据
|
||
homework_data = await self._fetch_homework_data(
|
||
student_id, start_date, end_date
|
||
)
|
||
exam_data = await self._fetch_exam_data(
|
||
student_id, start_date, end_date
|
||
)
|
||
practice_data = await self._fetch_practice_data(
|
||
student_id, start_date, end_date
|
||
)
|
||
writing_data = await self._fetch_writing_data(
|
||
student_id, start_date, end_date
|
||
)
|
||
usage_data = await self._fetch_usage_data(
|
||
student_id, start_date, end_date
|
||
)
|
||
|
||
# 2. 分析各维度
|
||
subject_abilities = self._analyze_subject_abilities(
|
||
homework_data, exam_data, practice_data
|
||
)
|
||
learning_habit = self._analyze_learning_habit(usage_data)
|
||
writing_ability = self._analyze_writing_ability(writing_data)
|
||
score_trends = self._analyze_score_trends(
|
||
homework_data, exam_data
|
||
)
|
||
|
||
# 3. 计算综合评分
|
||
overall_score = self._calculate_overall_score(
|
||
subject_abilities, learning_habit, writing_ability
|
||
)
|
||
|
||
# 4. 计算排名
|
||
rank_in_class, rank_in_grade, percentile = (
|
||
await self._calculate_rankings(
|
||
student_id,
|
||
student_info.get("class_id", ""),
|
||
student_info.get("grade", ""),
|
||
overall_score,
|
||
)
|
||
)
|
||
|
||
profile = ComprehensiveProfile(
|
||
student_id=student_id,
|
||
student_name=student_info.get("name", ""),
|
||
class_id=student_info.get("class_id", ""),
|
||
grade=student_info.get("grade", ""),
|
||
school_id=student_info.get("school_id", ""),
|
||
overall_score=round(overall_score, 1),
|
||
rank_in_class=rank_in_class,
|
||
rank_in_grade=rank_in_grade,
|
||
percentile=round(percentile, 1),
|
||
subject_abilities=subject_abilities,
|
||
learning_habit=learning_habit,
|
||
writing_ability=writing_ability,
|
||
score_trends=score_trends,
|
||
analyzed_at=datetime.now().isoformat(),
|
||
)
|
||
|
||
# 5. 写入ClickHouse画像宽表
|
||
await self._save_profile(profile)
|
||
|
||
logger.info(
|
||
"画像构建完成: %s, 综合评分=%.1f, 班级排名=%d",
|
||
student_id, overall_score, rank_in_class,
|
||
)
|
||
|
||
return profile
|
||
|
||
async def _fetch_homework_data(
|
||
self, student_id: str, start: date, end: date
|
||
) -> List[Dict[str, Any]]:
|
||
"""从ClickHouse获取作业成绩数据"""
|
||
# query = """
|
||
# SELECT subject, score, total_score, submitted_at, is_on_time
|
||
# FROM homework_submissions
|
||
# WHERE student_id = %(sid)s
|
||
# AND submitted_at BETWEEN %(start)s AND %(end)s
|
||
# ORDER BY submitted_at
|
||
# """
|
||
# return await clickhouse_query(query, {
|
||
# "sid": student_id, "start": str(start), "end": str(end)
|
||
# })
|
||
return []
|
||
|
||
async def _fetch_exam_data(
|
||
self, student_id: str, start: date, end: date
|
||
) -> List[Dict[str, Any]]:
|
||
"""从ClickHouse获取考试成绩数据"""
|
||
return []
|
||
|
||
async def _fetch_practice_data(
|
||
self, student_id: str, start: date, end: date
|
||
) -> List[Dict[str, Any]]:
|
||
"""获取练习(字帖/笔顺)数据"""
|
||
return []
|
||
|
||
async def _fetch_writing_data(
|
||
self, student_id: str, start: date, end: date
|
||
) -> List[Dict[str, Any]]:
|
||
"""获取书写质量评分数据"""
|
||
return []
|
||
|
||
async def _fetch_usage_data(
|
||
self, student_id: str, start: date, end: date
|
||
) -> List[Dict[str, Any]]:
|
||
"""获取应用使用时长数据"""
|
||
return []
|
||
|
||
def _analyze_subject_abilities(
|
||
self,
|
||
homework_data: List[Dict[str, Any]],
|
||
exam_data: List[Dict[str, Any]],
|
||
practice_data: List[Dict[str, Any]],
|
||
) -> List[SubjectAbility]:
|
||
"""
|
||
各科目能力多维评估
|
||
|
||
评估维度:
|
||
- 作业/考试平均分
|
||
- 知识点覆盖率(已接触/总知识点数)
|
||
- 练习频率(次/周)
|
||
- 进步速率(最近30天vs前30天分数差)
|
||
- 稳定性(分数标准差的倒数归一化)
|
||
"""
|
||
subject_map: Dict[str, Dict[str, List[float]]] = {}
|
||
|
||
# 按科目聚合作业分数
|
||
for hw in homework_data:
|
||
subject = hw.get("subject", "unknown")
|
||
subject_map.setdefault(subject, {"scores": [], "dates": []})
|
||
total = hw.get("total_score", 100)
|
||
score = hw.get("score", 0)
|
||
normalized = (score / max(total, 1)) * 100
|
||
subject_map[subject]["scores"].append(normalized)
|
||
|
||
# 按科目聚合考试分数
|
||
for exam in exam_data:
|
||
subject = exam.get("subject", "unknown")
|
||
subject_map.setdefault(subject, {"scores": [], "dates": []})
|
||
total = exam.get("total_score", 100)
|
||
score = exam.get("score", 0)
|
||
normalized = (score / max(total, 1)) * 100
|
||
subject_map[subject]["scores"].append(normalized)
|
||
|
||
abilities: List[SubjectAbility] = []
|
||
for subject, data in subject_map.items():
|
||
scores = data["scores"]
|
||
if not scores:
|
||
continue
|
||
|
||
avg_score = sum(scores) / len(scores)
|
||
|
||
# 稳定性: 1 / (1 + std_dev) 归一化到0-1
|
||
variance = sum((s - avg_score) ** 2 for s in scores) / max(
|
||
len(scores), 1
|
||
)
|
||
std_dev = math.sqrt(variance)
|
||
stability = 1.0 / (1.0 + std_dev / 10) # 归一化
|
||
|
||
# 进步速率: 后半段均分 - 前半段均分
|
||
mid = len(scores) // 2
|
||
if mid > 0:
|
||
first_half_avg = sum(scores[:mid]) / mid
|
||
second_half_avg = sum(scores[mid:]) / max(
|
||
len(scores) - mid, 1
|
||
)
|
||
improvement = second_half_avg - first_half_avg
|
||
else:
|
||
improvement = 0.0
|
||
|
||
abilities.append(SubjectAbility(
|
||
subject=subject,
|
||
overall_score=round(avg_score, 1),
|
||
stability=round(stability, 3),
|
||
improvement_rate=round(improvement, 1),
|
||
))
|
||
|
||
return abilities
|
||
|
||
def _analyze_learning_habit(
|
||
self, usage_data: List[Dict[str, Any]]
|
||
) -> LearningHabit:
|
||
"""
|
||
学习习惯分析
|
||
|
||
分析维度:
|
||
- 日均学习时长
|
||
- 学习高峰时段
|
||
- 周学习模式(周一到周日)
|
||
- 学习规律性评分
|
||
"""
|
||
if not usage_data:
|
||
return LearningHabit()
|
||
|
||
# 按日期聚合使用时长
|
||
daily_minutes: Dict[str, float] = {}
|
||
hourly_counts: Dict[int, int] = {}
|
||
weekday_minutes: Dict[int, List[float]] = {
|
||
i: [] for i in range(7)
|
||
}
|
||
|
||
for record in usage_data:
|
||
date_str = record.get("date", "")
|
||
minutes = record.get("duration_minutes", 0)
|
||
hour = record.get("start_hour", 0)
|
||
|
||
daily_minutes[date_str] = (
|
||
daily_minutes.get(date_str, 0) + minutes
|
||
)
|
||
hourly_counts[hour] = hourly_counts.get(hour, 0) + 1
|
||
|
||
# 日均时长
|
||
total_days = max(len(daily_minutes), 1)
|
||
avg_daily = sum(daily_minutes.values()) / total_days
|
||
|
||
# 学习高峰时段
|
||
peak_hour = max(
|
||
hourly_counts, key=hourly_counts.get, default=0
|
||
)
|
||
|
||
# 学习规律性: 日均时长的变异系数越小越规律
|
||
if daily_minutes:
|
||
values = list(daily_minutes.values())
|
||
mean_val = sum(values) / len(values)
|
||
variance = sum((v - mean_val) ** 2 for v in values) / len(
|
||
values
|
||
)
|
||
std_val = math.sqrt(variance)
|
||
cv = std_val / max(mean_val, 1)
|
||
consistency = max(0.0, 1.0 - cv) # 变异系数越小规律性越高
|
||
else:
|
||
consistency = 0.0
|
||
|
||
return LearningHabit(
|
||
avg_daily_minutes=round(avg_daily, 1),
|
||
peak_study_hour=peak_hour,
|
||
consistency_score=round(consistency, 3),
|
||
)
|
||
|
||
def _analyze_writing_ability(
|
||
self, writing_data: List[Dict[str, Any]]
|
||
) -> WritingAbility:
|
||
"""
|
||
书写能力评估
|
||
|
||
基于笔顺准确率、书写规范性评分、书写速度等维度综合评估。
|
||
通过对比最近和较早的数据判断进步趋势。
|
||
"""
|
||
if not writing_data:
|
||
return WritingAbility()
|
||
|
||
# 计算各维度平均值
|
||
stroke_scores = [
|
||
d.get("stroke_order_score", 0) for d in writing_data
|
||
]
|
||
quality_scores = [
|
||
d.get("quality_score", 0) for d in writing_data
|
||
]
|
||
speeds = [d.get("speed", 0) for d in writing_data]
|
||
structure_scores = [
|
||
d.get("structure_score", 0) for d in writing_data
|
||
]
|
||
|
||
avg_stroke = sum(stroke_scores) / max(len(stroke_scores), 1)
|
||
avg_quality = sum(quality_scores) / max(len(quality_scores), 1)
|
||
avg_speed = sum(speeds) / max(len(speeds), 1)
|
||
avg_structure = sum(structure_scores) / max(
|
||
len(structure_scores), 1
|
||
)
|
||
|
||
# 判断趋势: 后半段 vs 前半段
|
||
mid = len(quality_scores) // 2
|
||
if mid > 0:
|
||
early_avg = sum(quality_scores[:mid]) / mid
|
||
recent_avg = sum(quality_scores[mid:]) / max(
|
||
len(quality_scores) - mid, 1
|
||
)
|
||
if recent_avg - early_avg > 3:
|
||
trend = "improving"
|
||
elif early_avg - recent_avg > 3:
|
||
trend = "declining"
|
||
else:
|
||
trend = "stable"
|
||
else:
|
||
trend = "stable"
|
||
|
||
return WritingAbility(
|
||
stroke_order_accuracy=round(avg_stroke, 1),
|
||
writing_quality=round(avg_quality, 1),
|
||
writing_speed=round(avg_speed, 1),
|
||
char_structure_score=round(avg_structure, 1),
|
||
improvement_trend=trend,
|
||
)
|
||
|
||
def _analyze_score_trends(
|
||
self,
|
||
homework_data: List[Dict[str, Any]],
|
||
exam_data: List[Dict[str, Any]],
|
||
) -> List[ScoreTrend]:
|
||
"""生成成绩趋势数据"""
|
||
trends: List[ScoreTrend] = []
|
||
|
||
for hw in homework_data:
|
||
total = hw.get("total_score", 100)
|
||
score = hw.get("score", 0)
|
||
normalized = (score / max(total, 1)) * 100
|
||
trends.append(ScoreTrend(
|
||
date=hw.get("submitted_at", "")[:10],
|
||
score=round(normalized, 1),
|
||
subject=hw.get("subject", ""),
|
||
exam_type="homework",
|
||
))
|
||
|
||
for exam in exam_data:
|
||
total = exam.get("total_score", 100)
|
||
score = exam.get("score", 0)
|
||
normalized = (score / max(total, 1)) * 100
|
||
trends.append(ScoreTrend(
|
||
date=exam.get("exam_date", "")[:10],
|
||
score=round(normalized, 1),
|
||
subject=exam.get("subject", ""),
|
||
exam_type="exam",
|
||
))
|
||
|
||
# 按日期排序
|
||
trends.sort(key=lambda t: t.date)
|
||
return trends
|
||
|
||
def _calculate_overall_score(
|
||
self,
|
||
subject_abilities: List[SubjectAbility],
|
||
learning_habit: LearningHabit,
|
||
writing_ability: WritingAbility,
|
||
) -> float:
|
||
"""
|
||
计算综合评分(百分制)
|
||
|
||
加权公式:
|
||
综合分 = 作业成绩×0.30 + 考试成绩×0.35 + 练习×0.15
|
||
+ 书写×0.10 + 学习习惯×0.10
|
||
"""
|
||
# 作业/考试平均分
|
||
if subject_abilities:
|
||
academic_avg = sum(
|
||
a.overall_score for a in subject_abilities
|
||
) / len(subject_abilities)
|
||
else:
|
||
academic_avg = 0.0
|
||
|
||
# 书写能力评分(归一化到百分制)
|
||
writing_score = writing_ability.writing_quality
|
||
|
||
# 学习习惯评分(规律性×100)
|
||
habit_score = learning_habit.consistency_score * 100
|
||
|
||
# 加权综合
|
||
overall = (
|
||
academic_avg * (self.WEIGHT_HOMEWORK_SCORE + self.WEIGHT_EXAM_SCORE)
|
||
+ academic_avg * self.WEIGHT_PRACTICE
|
||
+ writing_score * self.WEIGHT_WRITING
|
||
+ habit_score * self.WEIGHT_HABIT
|
||
)
|
||
|
||
return min(100.0, max(0.0, overall))
|
||
|
||
async def _calculate_rankings(
|
||
self,
|
||
student_id: str,
|
||
class_id: str,
|
||
grade: str,
|
||
score: float,
|
||
) -> Tuple[int, int, float]:
|
||
"""
|
||
计算班级排名和年级百分位排名
|
||
|
||
从ClickHouse查询同班和同年级学生的综合评分,
|
||
计算当前学生的排名位置。
|
||
"""
|
||
# 查询同班学生评分
|
||
# class_scores = await query_class_scores(class_id)
|
||
# class_rank = sum(1 for s in class_scores if s > score) + 1
|
||
|
||
# 查询同年级学生评分
|
||
# grade_scores = await query_grade_scores(grade)
|
||
# grade_rank = sum(1 for s in grade_scores if s > score) + 1
|
||
# percentile = (1 - grade_rank / max(len(grade_scores), 1)) * 100
|
||
|
||
return 0, 0, 0.0
|
||
|
||
async def _save_profile(self, profile: ComprehensiveProfile) -> None:
|
||
"""将画像数据写入ClickHouse画像宽表"""
|
||
# clickhouse_client.execute(
|
||
# "INSERT INTO student_profile VALUES",
|
||
# [profile_to_row(profile)],
|
||
# )
|
||
pass
|