software copyright
This commit is contained in:
@@ -0,0 +1,541 @@
|
||||
# 自然写教学数据分析与学情诊断系统软件 V1.0
|
||||
# analytics/student_profiler.py - 学生画像分析引擎
|
||||
|
||||
import logging
|
||||
import math
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from datetime import datetime, date, timedelta
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
logger = logging.getLogger("writech.analytics.profiler")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 画像分析数据模型
|
||||
# ============================================================
|
||||
|
||||
@dataclass
|
||||
class ScoreTrend:
|
||||
"""成绩趋势数据点"""
|
||||
date: str
|
||||
score: float
|
||||
subject: str
|
||||
exam_type: str = "" # homework/exam/practice
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubjectAbility:
|
||||
"""科目能力评估"""
|
||||
subject: str
|
||||
overall_score: float = 0.0
|
||||
knowledge_coverage: float = 0.0 # 知识点覆盖率
|
||||
practice_frequency: float = 0.0 # 练习频率(次/周)
|
||||
improvement_rate: float = 0.0 # 进步速率
|
||||
stability: float = 0.0 # 稳定性(分数方差的倒数)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LearningHabit:
|
||||
"""学习习惯画像"""
|
||||
avg_daily_minutes: float = 0.0
|
||||
peak_study_hour: int = 0 # 学习高峰时段(小时)
|
||||
weekly_pattern: List[float] = field(default_factory=list) # 周一~日时长
|
||||
consistency_score: float = 0.0 # 学习规律性评分
|
||||
homework_timeliness: float = 0.0 # 作业及时提交率
|
||||
|
||||
|
||||
@dataclass
|
||||
class WritingAbility:
|
||||
"""书写能力评估"""
|
||||
stroke_order_accuracy: float = 0.0 # 笔顺正确率
|
||||
writing_quality: float = 0.0 # 书写规范性
|
||||
writing_speed: float = 0.0 # 书写速度(字/分)
|
||||
char_structure_score: float = 0.0 # 字形结构评分
|
||||
improvement_trend: str = "stable" # 进步趋势
|
||||
|
||||
|
||||
@dataclass
|
||||
class ComprehensiveProfile:
|
||||
"""综合学情画像"""
|
||||
student_id: str
|
||||
student_name: str
|
||||
class_id: str
|
||||
grade: str
|
||||
school_id: str
|
||||
|
||||
# 综合评分
|
||||
overall_score: float = 0.0
|
||||
rank_in_class: int = 0
|
||||
rank_in_grade: int = 0
|
||||
percentile: float = 0.0
|
||||
|
||||
# 各科能力
|
||||
subject_abilities: List[SubjectAbility] = field(default_factory=list)
|
||||
|
||||
# 学习习惯
|
||||
learning_habit: Optional[LearningHabit] = None
|
||||
|
||||
# 书写能力
|
||||
writing_ability: Optional[WritingAbility] = None
|
||||
|
||||
# 成绩趋势
|
||||
score_trends: List[ScoreTrend] = field(default_factory=list)
|
||||
|
||||
# 分析时间
|
||||
analyzed_at: str = ""
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 画像分析引擎
|
||||
# ============================================================
|
||||
|
||||
class StudentProfiler:
|
||||
"""
|
||||
学生画像分析引擎
|
||||
|
||||
功能:
|
||||
1. 综合学情评分计算
|
||||
2. 各科目能力多维评估
|
||||
3. 学习习惯分析
|
||||
4. 书写能力评估
|
||||
5. 成绩趋势分析与预测
|
||||
6. 班级/年级排名计算
|
||||
"""
|
||||
|
||||
# 各维度权重(用于综合评分计算)
|
||||
WEIGHT_HOMEWORK_SCORE = 0.30 # 作业成绩权重
|
||||
WEIGHT_EXAM_SCORE = 0.35 # 考试成绩权重
|
||||
WEIGHT_PRACTICE = 0.15 # 练习表现权重
|
||||
WEIGHT_WRITING = 0.10 # 书写能力权重
|
||||
WEIGHT_HABIT = 0.10 # 学习习惯权重
|
||||
|
||||
# 评分标准
|
||||
EXCELLENT_THRESHOLD = 90.0
|
||||
GOOD_THRESHOLD = 75.0
|
||||
PASS_THRESHOLD = 60.0
|
||||
|
||||
def __init__(self):
|
||||
"""初始化画像分析引擎"""
|
||||
logger.info("学生画像分析引擎初始化")
|
||||
|
||||
async def build_profile(
|
||||
self,
|
||||
student_id: str,
|
||||
student_info: Dict[str, Any],
|
||||
period_days: int = 30,
|
||||
) -> ComprehensiveProfile:
|
||||
"""
|
||||
构建学生综合画像
|
||||
|
||||
Args:
|
||||
student_id: 学生ID
|
||||
student_info: 学生基本信息
|
||||
period_days: 分析周期(天)
|
||||
|
||||
Returns:
|
||||
综合学情画像
|
||||
"""
|
||||
logger.info(
|
||||
"构建学生画像: %s, 分析周期=%d天", student_id, period_days
|
||||
)
|
||||
|
||||
end_date = date.today()
|
||||
start_date = end_date - timedelta(days=period_days)
|
||||
|
||||
# 1. 获取原始数据
|
||||
homework_data = await self._fetch_homework_data(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
exam_data = await self._fetch_exam_data(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
practice_data = await self._fetch_practice_data(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
writing_data = await self._fetch_writing_data(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
usage_data = await self._fetch_usage_data(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
|
||||
# 2. 分析各维度
|
||||
subject_abilities = self._analyze_subject_abilities(
|
||||
homework_data, exam_data, practice_data
|
||||
)
|
||||
learning_habit = self._analyze_learning_habit(usage_data)
|
||||
writing_ability = self._analyze_writing_ability(writing_data)
|
||||
score_trends = self._analyze_score_trends(
|
||||
homework_data, exam_data
|
||||
)
|
||||
|
||||
# 3. 计算综合评分
|
||||
overall_score = self._calculate_overall_score(
|
||||
subject_abilities, learning_habit, writing_ability
|
||||
)
|
||||
|
||||
# 4. 计算排名
|
||||
rank_in_class, rank_in_grade, percentile = (
|
||||
await self._calculate_rankings(
|
||||
student_id,
|
||||
student_info.get("class_id", ""),
|
||||
student_info.get("grade", ""),
|
||||
overall_score,
|
||||
)
|
||||
)
|
||||
|
||||
profile = ComprehensiveProfile(
|
||||
student_id=student_id,
|
||||
student_name=student_info.get("name", ""),
|
||||
class_id=student_info.get("class_id", ""),
|
||||
grade=student_info.get("grade", ""),
|
||||
school_id=student_info.get("school_id", ""),
|
||||
overall_score=round(overall_score, 1),
|
||||
rank_in_class=rank_in_class,
|
||||
rank_in_grade=rank_in_grade,
|
||||
percentile=round(percentile, 1),
|
||||
subject_abilities=subject_abilities,
|
||||
learning_habit=learning_habit,
|
||||
writing_ability=writing_ability,
|
||||
score_trends=score_trends,
|
||||
analyzed_at=datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
# 5. 写入ClickHouse画像宽表
|
||||
await self._save_profile(profile)
|
||||
|
||||
logger.info(
|
||||
"画像构建完成: %s, 综合评分=%.1f, 班级排名=%d",
|
||||
student_id, overall_score, rank_in_class,
|
||||
)
|
||||
|
||||
return profile
|
||||
|
||||
async def _fetch_homework_data(
|
||||
self, student_id: str, start: date, end: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""从ClickHouse获取作业成绩数据"""
|
||||
# query = """
|
||||
# SELECT subject, score, total_score, submitted_at, is_on_time
|
||||
# FROM homework_submissions
|
||||
# WHERE student_id = %(sid)s
|
||||
# AND submitted_at BETWEEN %(start)s AND %(end)s
|
||||
# ORDER BY submitted_at
|
||||
# """
|
||||
# return await clickhouse_query(query, {
|
||||
# "sid": student_id, "start": str(start), "end": str(end)
|
||||
# })
|
||||
return []
|
||||
|
||||
async def _fetch_exam_data(
|
||||
self, student_id: str, start: date, end: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""从ClickHouse获取考试成绩数据"""
|
||||
return []
|
||||
|
||||
async def _fetch_practice_data(
|
||||
self, student_id: str, start: date, end: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""获取练习(字帖/笔顺)数据"""
|
||||
return []
|
||||
|
||||
async def _fetch_writing_data(
|
||||
self, student_id: str, start: date, end: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""获取书写质量评分数据"""
|
||||
return []
|
||||
|
||||
async def _fetch_usage_data(
|
||||
self, student_id: str, start: date, end: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""获取应用使用时长数据"""
|
||||
return []
|
||||
|
||||
def _analyze_subject_abilities(
|
||||
self,
|
||||
homework_data: List[Dict[str, Any]],
|
||||
exam_data: List[Dict[str, Any]],
|
||||
practice_data: List[Dict[str, Any]],
|
||||
) -> List[SubjectAbility]:
|
||||
"""
|
||||
各科目能力多维评估
|
||||
|
||||
评估维度:
|
||||
- 作业/考试平均分
|
||||
- 知识点覆盖率(已接触/总知识点数)
|
||||
- 练习频率(次/周)
|
||||
- 进步速率(最近30天vs前30天分数差)
|
||||
- 稳定性(分数标准差的倒数归一化)
|
||||
"""
|
||||
subject_map: Dict[str, Dict[str, List[float]]] = {}
|
||||
|
||||
# 按科目聚合作业分数
|
||||
for hw in homework_data:
|
||||
subject = hw.get("subject", "unknown")
|
||||
subject_map.setdefault(subject, {"scores": [], "dates": []})
|
||||
total = hw.get("total_score", 100)
|
||||
score = hw.get("score", 0)
|
||||
normalized = (score / max(total, 1)) * 100
|
||||
subject_map[subject]["scores"].append(normalized)
|
||||
|
||||
# 按科目聚合考试分数
|
||||
for exam in exam_data:
|
||||
subject = exam.get("subject", "unknown")
|
||||
subject_map.setdefault(subject, {"scores": [], "dates": []})
|
||||
total = exam.get("total_score", 100)
|
||||
score = exam.get("score", 0)
|
||||
normalized = (score / max(total, 1)) * 100
|
||||
subject_map[subject]["scores"].append(normalized)
|
||||
|
||||
abilities: List[SubjectAbility] = []
|
||||
for subject, data in subject_map.items():
|
||||
scores = data["scores"]
|
||||
if not scores:
|
||||
continue
|
||||
|
||||
avg_score = sum(scores) / len(scores)
|
||||
|
||||
# 稳定性: 1 / (1 + std_dev) 归一化到0-1
|
||||
variance = sum((s - avg_score) ** 2 for s in scores) / max(
|
||||
len(scores), 1
|
||||
)
|
||||
std_dev = math.sqrt(variance)
|
||||
stability = 1.0 / (1.0 + std_dev / 10) # 归一化
|
||||
|
||||
# 进步速率: 后半段均分 - 前半段均分
|
||||
mid = len(scores) // 2
|
||||
if mid > 0:
|
||||
first_half_avg = sum(scores[:mid]) / mid
|
||||
second_half_avg = sum(scores[mid:]) / max(
|
||||
len(scores) - mid, 1
|
||||
)
|
||||
improvement = second_half_avg - first_half_avg
|
||||
else:
|
||||
improvement = 0.0
|
||||
|
||||
abilities.append(SubjectAbility(
|
||||
subject=subject,
|
||||
overall_score=round(avg_score, 1),
|
||||
stability=round(stability, 3),
|
||||
improvement_rate=round(improvement, 1),
|
||||
))
|
||||
|
||||
return abilities
|
||||
|
||||
def _analyze_learning_habit(
|
||||
self, usage_data: List[Dict[str, Any]]
|
||||
) -> LearningHabit:
|
||||
"""
|
||||
学习习惯分析
|
||||
|
||||
分析维度:
|
||||
- 日均学习时长
|
||||
- 学习高峰时段
|
||||
- 周学习模式(周一到周日)
|
||||
- 学习规律性评分
|
||||
"""
|
||||
if not usage_data:
|
||||
return LearningHabit()
|
||||
|
||||
# 按日期聚合使用时长
|
||||
daily_minutes: Dict[str, float] = {}
|
||||
hourly_counts: Dict[int, int] = {}
|
||||
weekday_minutes: Dict[int, List[float]] = {
|
||||
i: [] for i in range(7)
|
||||
}
|
||||
|
||||
for record in usage_data:
|
||||
date_str = record.get("date", "")
|
||||
minutes = record.get("duration_minutes", 0)
|
||||
hour = record.get("start_hour", 0)
|
||||
|
||||
daily_minutes[date_str] = (
|
||||
daily_minutes.get(date_str, 0) + minutes
|
||||
)
|
||||
hourly_counts[hour] = hourly_counts.get(hour, 0) + 1
|
||||
|
||||
# 日均时长
|
||||
total_days = max(len(daily_minutes), 1)
|
||||
avg_daily = sum(daily_minutes.values()) / total_days
|
||||
|
||||
# 学习高峰时段
|
||||
peak_hour = max(
|
||||
hourly_counts, key=hourly_counts.get, default=0
|
||||
)
|
||||
|
||||
# 学习规律性: 日均时长的变异系数越小越规律
|
||||
if daily_minutes:
|
||||
values = list(daily_minutes.values())
|
||||
mean_val = sum(values) / len(values)
|
||||
variance = sum((v - mean_val) ** 2 for v in values) / len(
|
||||
values
|
||||
)
|
||||
std_val = math.sqrt(variance)
|
||||
cv = std_val / max(mean_val, 1)
|
||||
consistency = max(0.0, 1.0 - cv) # 变异系数越小规律性越高
|
||||
else:
|
||||
consistency = 0.0
|
||||
|
||||
return LearningHabit(
|
||||
avg_daily_minutes=round(avg_daily, 1),
|
||||
peak_study_hour=peak_hour,
|
||||
consistency_score=round(consistency, 3),
|
||||
)
|
||||
|
||||
def _analyze_writing_ability(
|
||||
self, writing_data: List[Dict[str, Any]]
|
||||
) -> WritingAbility:
|
||||
"""
|
||||
书写能力评估
|
||||
|
||||
基于笔顺准确率、书写规范性评分、书写速度等维度综合评估。
|
||||
通过对比最近和较早的数据判断进步趋势。
|
||||
"""
|
||||
if not writing_data:
|
||||
return WritingAbility()
|
||||
|
||||
# 计算各维度平均值
|
||||
stroke_scores = [
|
||||
d.get("stroke_order_score", 0) for d in writing_data
|
||||
]
|
||||
quality_scores = [
|
||||
d.get("quality_score", 0) for d in writing_data
|
||||
]
|
||||
speeds = [d.get("speed", 0) for d in writing_data]
|
||||
structure_scores = [
|
||||
d.get("structure_score", 0) for d in writing_data
|
||||
]
|
||||
|
||||
avg_stroke = sum(stroke_scores) / max(len(stroke_scores), 1)
|
||||
avg_quality = sum(quality_scores) / max(len(quality_scores), 1)
|
||||
avg_speed = sum(speeds) / max(len(speeds), 1)
|
||||
avg_structure = sum(structure_scores) / max(
|
||||
len(structure_scores), 1
|
||||
)
|
||||
|
||||
# 判断趋势: 后半段 vs 前半段
|
||||
mid = len(quality_scores) // 2
|
||||
if mid > 0:
|
||||
early_avg = sum(quality_scores[:mid]) / mid
|
||||
recent_avg = sum(quality_scores[mid:]) / max(
|
||||
len(quality_scores) - mid, 1
|
||||
)
|
||||
if recent_avg - early_avg > 3:
|
||||
trend = "improving"
|
||||
elif early_avg - recent_avg > 3:
|
||||
trend = "declining"
|
||||
else:
|
||||
trend = "stable"
|
||||
else:
|
||||
trend = "stable"
|
||||
|
||||
return WritingAbility(
|
||||
stroke_order_accuracy=round(avg_stroke, 1),
|
||||
writing_quality=round(avg_quality, 1),
|
||||
writing_speed=round(avg_speed, 1),
|
||||
char_structure_score=round(avg_structure, 1),
|
||||
improvement_trend=trend,
|
||||
)
|
||||
|
||||
def _analyze_score_trends(
|
||||
self,
|
||||
homework_data: List[Dict[str, Any]],
|
||||
exam_data: List[Dict[str, Any]],
|
||||
) -> List[ScoreTrend]:
|
||||
"""生成成绩趋势数据"""
|
||||
trends: List[ScoreTrend] = []
|
||||
|
||||
for hw in homework_data:
|
||||
total = hw.get("total_score", 100)
|
||||
score = hw.get("score", 0)
|
||||
normalized = (score / max(total, 1)) * 100
|
||||
trends.append(ScoreTrend(
|
||||
date=hw.get("submitted_at", "")[:10],
|
||||
score=round(normalized, 1),
|
||||
subject=hw.get("subject", ""),
|
||||
exam_type="homework",
|
||||
))
|
||||
|
||||
for exam in exam_data:
|
||||
total = exam.get("total_score", 100)
|
||||
score = exam.get("score", 0)
|
||||
normalized = (score / max(total, 1)) * 100
|
||||
trends.append(ScoreTrend(
|
||||
date=exam.get("exam_date", "")[:10],
|
||||
score=round(normalized, 1),
|
||||
subject=exam.get("subject", ""),
|
||||
exam_type="exam",
|
||||
))
|
||||
|
||||
# 按日期排序
|
||||
trends.sort(key=lambda t: t.date)
|
||||
return trends
|
||||
|
||||
def _calculate_overall_score(
|
||||
self,
|
||||
subject_abilities: List[SubjectAbility],
|
||||
learning_habit: LearningHabit,
|
||||
writing_ability: WritingAbility,
|
||||
) -> float:
|
||||
"""
|
||||
计算综合评分(百分制)
|
||||
|
||||
加权公式:
|
||||
综合分 = 作业成绩×0.30 + 考试成绩×0.35 + 练习×0.15
|
||||
+ 书写×0.10 + 学习习惯×0.10
|
||||
"""
|
||||
# 作业/考试平均分
|
||||
if subject_abilities:
|
||||
academic_avg = sum(
|
||||
a.overall_score for a in subject_abilities
|
||||
) / len(subject_abilities)
|
||||
else:
|
||||
academic_avg = 0.0
|
||||
|
||||
# 书写能力评分(归一化到百分制)
|
||||
writing_score = writing_ability.writing_quality
|
||||
|
||||
# 学习习惯评分(规律性×100)
|
||||
habit_score = learning_habit.consistency_score * 100
|
||||
|
||||
# 加权综合
|
||||
overall = (
|
||||
academic_avg * (self.WEIGHT_HOMEWORK_SCORE + self.WEIGHT_EXAM_SCORE)
|
||||
+ academic_avg * self.WEIGHT_PRACTICE
|
||||
+ writing_score * self.WEIGHT_WRITING
|
||||
+ habit_score * self.WEIGHT_HABIT
|
||||
)
|
||||
|
||||
return min(100.0, max(0.0, overall))
|
||||
|
||||
async def _calculate_rankings(
|
||||
self,
|
||||
student_id: str,
|
||||
class_id: str,
|
||||
grade: str,
|
||||
score: float,
|
||||
) -> Tuple[int, int, float]:
|
||||
"""
|
||||
计算班级排名和年级百分位排名
|
||||
|
||||
从ClickHouse查询同班和同年级学生的综合评分,
|
||||
计算当前学生的排名位置。
|
||||
"""
|
||||
# 查询同班学生评分
|
||||
# class_scores = await query_class_scores(class_id)
|
||||
# class_rank = sum(1 for s in class_scores if s > score) + 1
|
||||
|
||||
# 查询同年级学生评分
|
||||
# grade_scores = await query_grade_scores(grade)
|
||||
# grade_rank = sum(1 for s in grade_scores if s > score) + 1
|
||||
# percentile = (1 - grade_rank / max(len(grade_scores), 1)) * 100
|
||||
|
||||
return 0, 0, 0.0
|
||||
|
||||
async def _save_profile(self, profile: ComprehensiveProfile) -> None:
|
||||
"""将画像数据写入ClickHouse画像宽表"""
|
||||
# clickhouse_client.execute(
|
||||
# "INSERT INTO student_profile VALUES",
|
||||
# [profile_to_row(profile)],
|
||||
# )
|
||||
pass
|
||||
Reference in New Issue
Block a user