software copyright

This commit is contained in:
jiahong
2026-03-22 15:24:40 +08:00
parent e303bb868a
commit 60f336e345
155 changed files with 127262 additions and 0 deletions
@@ -0,0 +1,541 @@
# 自然写教学数据分析与学情诊断系统软件 V1.0
# analytics/student_profiler.py - 学生画像分析引擎
import logging
import math
from typing import Any, Dict, List, Optional, Tuple
from datetime import datetime, date, timedelta
from dataclasses import dataclass, field
logger = logging.getLogger("writech.analytics.profiler")
# ============================================================
# 画像分析数据模型
# ============================================================
@dataclass
class ScoreTrend:
"""成绩趋势数据点"""
date: str
score: float
subject: str
exam_type: str = "" # homework/exam/practice
@dataclass
class SubjectAbility:
"""科目能力评估"""
subject: str
overall_score: float = 0.0
knowledge_coverage: float = 0.0 # 知识点覆盖率
practice_frequency: float = 0.0 # 练习频率(次/周)
improvement_rate: float = 0.0 # 进步速率
stability: float = 0.0 # 稳定性(分数方差的倒数)
@dataclass
class LearningHabit:
"""学习习惯画像"""
avg_daily_minutes: float = 0.0
peak_study_hour: int = 0 # 学习高峰时段(小时)
weekly_pattern: List[float] = field(default_factory=list) # 周一~日时长
consistency_score: float = 0.0 # 学习规律性评分
homework_timeliness: float = 0.0 # 作业及时提交率
@dataclass
class WritingAbility:
"""书写能力评估"""
stroke_order_accuracy: float = 0.0 # 笔顺正确率
writing_quality: float = 0.0 # 书写规范性
writing_speed: float = 0.0 # 书写速度(字/分)
char_structure_score: float = 0.0 # 字形结构评分
improvement_trend: str = "stable" # 进步趋势
@dataclass
class ComprehensiveProfile:
"""综合学情画像"""
student_id: str
student_name: str
class_id: str
grade: str
school_id: str
# 综合评分
overall_score: float = 0.0
rank_in_class: int = 0
rank_in_grade: int = 0
percentile: float = 0.0
# 各科能力
subject_abilities: List[SubjectAbility] = field(default_factory=list)
# 学习习惯
learning_habit: Optional[LearningHabit] = None
# 书写能力
writing_ability: Optional[WritingAbility] = None
# 成绩趋势
score_trends: List[ScoreTrend] = field(default_factory=list)
# 分析时间
analyzed_at: str = ""
# ============================================================
# 画像分析引擎
# ============================================================
class StudentProfiler:
"""
学生画像分析引擎
功能:
1. 综合学情评分计算
2. 各科目能力多维评估
3. 学习习惯分析
4. 书写能力评估
5. 成绩趋势分析与预测
6. 班级/年级排名计算
"""
# 各维度权重(用于综合评分计算)
WEIGHT_HOMEWORK_SCORE = 0.30 # 作业成绩权重
WEIGHT_EXAM_SCORE = 0.35 # 考试成绩权重
WEIGHT_PRACTICE = 0.15 # 练习表现权重
WEIGHT_WRITING = 0.10 # 书写能力权重
WEIGHT_HABIT = 0.10 # 学习习惯权重
# 评分标准
EXCELLENT_THRESHOLD = 90.0
GOOD_THRESHOLD = 75.0
PASS_THRESHOLD = 60.0
def __init__(self):
"""初始化画像分析引擎"""
logger.info("学生画像分析引擎初始化")
async def build_profile(
self,
student_id: str,
student_info: Dict[str, Any],
period_days: int = 30,
) -> ComprehensiveProfile:
"""
构建学生综合画像
Args:
student_id: 学生ID
student_info: 学生基本信息
period_days: 分析周期(天)
Returns:
综合学情画像
"""
logger.info(
"构建学生画像: %s, 分析周期=%d", student_id, period_days
)
end_date = date.today()
start_date = end_date - timedelta(days=period_days)
# 1. 获取原始数据
homework_data = await self._fetch_homework_data(
student_id, start_date, end_date
)
exam_data = await self._fetch_exam_data(
student_id, start_date, end_date
)
practice_data = await self._fetch_practice_data(
student_id, start_date, end_date
)
writing_data = await self._fetch_writing_data(
student_id, start_date, end_date
)
usage_data = await self._fetch_usage_data(
student_id, start_date, end_date
)
# 2. 分析各维度
subject_abilities = self._analyze_subject_abilities(
homework_data, exam_data, practice_data
)
learning_habit = self._analyze_learning_habit(usage_data)
writing_ability = self._analyze_writing_ability(writing_data)
score_trends = self._analyze_score_trends(
homework_data, exam_data
)
# 3. 计算综合评分
overall_score = self._calculate_overall_score(
subject_abilities, learning_habit, writing_ability
)
# 4. 计算排名
rank_in_class, rank_in_grade, percentile = (
await self._calculate_rankings(
student_id,
student_info.get("class_id", ""),
student_info.get("grade", ""),
overall_score,
)
)
profile = ComprehensiveProfile(
student_id=student_id,
student_name=student_info.get("name", ""),
class_id=student_info.get("class_id", ""),
grade=student_info.get("grade", ""),
school_id=student_info.get("school_id", ""),
overall_score=round(overall_score, 1),
rank_in_class=rank_in_class,
rank_in_grade=rank_in_grade,
percentile=round(percentile, 1),
subject_abilities=subject_abilities,
learning_habit=learning_habit,
writing_ability=writing_ability,
score_trends=score_trends,
analyzed_at=datetime.now().isoformat(),
)
# 5. 写入ClickHouse画像宽表
await self._save_profile(profile)
logger.info(
"画像构建完成: %s, 综合评分=%.1f, 班级排名=%d",
student_id, overall_score, rank_in_class,
)
return profile
async def _fetch_homework_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""从ClickHouse获取作业成绩数据"""
# query = """
# SELECT subject, score, total_score, submitted_at, is_on_time
# FROM homework_submissions
# WHERE student_id = %(sid)s
# AND submitted_at BETWEEN %(start)s AND %(end)s
# ORDER BY submitted_at
# """
# return await clickhouse_query(query, {
# "sid": student_id, "start": str(start), "end": str(end)
# })
return []
async def _fetch_exam_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""从ClickHouse获取考试成绩数据"""
return []
async def _fetch_practice_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""获取练习(字帖/笔顺)数据"""
return []
async def _fetch_writing_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""获取书写质量评分数据"""
return []
async def _fetch_usage_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""获取应用使用时长数据"""
return []
def _analyze_subject_abilities(
self,
homework_data: List[Dict[str, Any]],
exam_data: List[Dict[str, Any]],
practice_data: List[Dict[str, Any]],
) -> List[SubjectAbility]:
"""
各科目能力多维评估
评估维度:
- 作业/考试平均分
- 知识点覆盖率(已接触/总知识点数)
- 练习频率(次/周)
- 进步速率(最近30天vs前30天分数差)
- 稳定性(分数标准差的倒数归一化)
"""
subject_map: Dict[str, Dict[str, List[float]]] = {}
# 按科目聚合作业分数
for hw in homework_data:
subject = hw.get("subject", "unknown")
subject_map.setdefault(subject, {"scores": [], "dates": []})
total = hw.get("total_score", 100)
score = hw.get("score", 0)
normalized = (score / max(total, 1)) * 100
subject_map[subject]["scores"].append(normalized)
# 按科目聚合考试分数
for exam in exam_data:
subject = exam.get("subject", "unknown")
subject_map.setdefault(subject, {"scores": [], "dates": []})
total = exam.get("total_score", 100)
score = exam.get("score", 0)
normalized = (score / max(total, 1)) * 100
subject_map[subject]["scores"].append(normalized)
abilities: List[SubjectAbility] = []
for subject, data in subject_map.items():
scores = data["scores"]
if not scores:
continue
avg_score = sum(scores) / len(scores)
# 稳定性: 1 / (1 + std_dev) 归一化到0-1
variance = sum((s - avg_score) ** 2 for s in scores) / max(
len(scores), 1
)
std_dev = math.sqrt(variance)
stability = 1.0 / (1.0 + std_dev / 10) # 归一化
# 进步速率: 后半段均分 - 前半段均分
mid = len(scores) // 2
if mid > 0:
first_half_avg = sum(scores[:mid]) / mid
second_half_avg = sum(scores[mid:]) / max(
len(scores) - mid, 1
)
improvement = second_half_avg - first_half_avg
else:
improvement = 0.0
abilities.append(SubjectAbility(
subject=subject,
overall_score=round(avg_score, 1),
stability=round(stability, 3),
improvement_rate=round(improvement, 1),
))
return abilities
def _analyze_learning_habit(
self, usage_data: List[Dict[str, Any]]
) -> LearningHabit:
"""
学习习惯分析
分析维度:
- 日均学习时长
- 学习高峰时段
- 周学习模式(周一到周日)
- 学习规律性评分
"""
if not usage_data:
return LearningHabit()
# 按日期聚合使用时长
daily_minutes: Dict[str, float] = {}
hourly_counts: Dict[int, int] = {}
weekday_minutes: Dict[int, List[float]] = {
i: [] for i in range(7)
}
for record in usage_data:
date_str = record.get("date", "")
minutes = record.get("duration_minutes", 0)
hour = record.get("start_hour", 0)
daily_minutes[date_str] = (
daily_minutes.get(date_str, 0) + minutes
)
hourly_counts[hour] = hourly_counts.get(hour, 0) + 1
# 日均时长
total_days = max(len(daily_minutes), 1)
avg_daily = sum(daily_minutes.values()) / total_days
# 学习高峰时段
peak_hour = max(
hourly_counts, key=hourly_counts.get, default=0
)
# 学习规律性: 日均时长的变异系数越小越规律
if daily_minutes:
values = list(daily_minutes.values())
mean_val = sum(values) / len(values)
variance = sum((v - mean_val) ** 2 for v in values) / len(
values
)
std_val = math.sqrt(variance)
cv = std_val / max(mean_val, 1)
consistency = max(0.0, 1.0 - cv) # 变异系数越小规律性越高
else:
consistency = 0.0
return LearningHabit(
avg_daily_minutes=round(avg_daily, 1),
peak_study_hour=peak_hour,
consistency_score=round(consistency, 3),
)
def _analyze_writing_ability(
self, writing_data: List[Dict[str, Any]]
) -> WritingAbility:
"""
书写能力评估
基于笔顺准确率、书写规范性评分、书写速度等维度综合评估。
通过对比最近和较早的数据判断进步趋势。
"""
if not writing_data:
return WritingAbility()
# 计算各维度平均值
stroke_scores = [
d.get("stroke_order_score", 0) for d in writing_data
]
quality_scores = [
d.get("quality_score", 0) for d in writing_data
]
speeds = [d.get("speed", 0) for d in writing_data]
structure_scores = [
d.get("structure_score", 0) for d in writing_data
]
avg_stroke = sum(stroke_scores) / max(len(stroke_scores), 1)
avg_quality = sum(quality_scores) / max(len(quality_scores), 1)
avg_speed = sum(speeds) / max(len(speeds), 1)
avg_structure = sum(structure_scores) / max(
len(structure_scores), 1
)
# 判断趋势: 后半段 vs 前半段
mid = len(quality_scores) // 2
if mid > 0:
early_avg = sum(quality_scores[:mid]) / mid
recent_avg = sum(quality_scores[mid:]) / max(
len(quality_scores) - mid, 1
)
if recent_avg - early_avg > 3:
trend = "improving"
elif early_avg - recent_avg > 3:
trend = "declining"
else:
trend = "stable"
else:
trend = "stable"
return WritingAbility(
stroke_order_accuracy=round(avg_stroke, 1),
writing_quality=round(avg_quality, 1),
writing_speed=round(avg_speed, 1),
char_structure_score=round(avg_structure, 1),
improvement_trend=trend,
)
def _analyze_score_trends(
self,
homework_data: List[Dict[str, Any]],
exam_data: List[Dict[str, Any]],
) -> List[ScoreTrend]:
"""生成成绩趋势数据"""
trends: List[ScoreTrend] = []
for hw in homework_data:
total = hw.get("total_score", 100)
score = hw.get("score", 0)
normalized = (score / max(total, 1)) * 100
trends.append(ScoreTrend(
date=hw.get("submitted_at", "")[:10],
score=round(normalized, 1),
subject=hw.get("subject", ""),
exam_type="homework",
))
for exam in exam_data:
total = exam.get("total_score", 100)
score = exam.get("score", 0)
normalized = (score / max(total, 1)) * 100
trends.append(ScoreTrend(
date=exam.get("exam_date", "")[:10],
score=round(normalized, 1),
subject=exam.get("subject", ""),
exam_type="exam",
))
# 按日期排序
trends.sort(key=lambda t: t.date)
return trends
def _calculate_overall_score(
self,
subject_abilities: List[SubjectAbility],
learning_habit: LearningHabit,
writing_ability: WritingAbility,
) -> float:
"""
计算综合评分(百分制)
加权公式:
综合分 = 作业成绩×0.30 + 考试成绩×0.35 + 练习×0.15
+ 书写×0.10 + 学习习惯×0.10
"""
# 作业/考试平均分
if subject_abilities:
academic_avg = sum(
a.overall_score for a in subject_abilities
) / len(subject_abilities)
else:
academic_avg = 0.0
# 书写能力评分(归一化到百分制)
writing_score = writing_ability.writing_quality
# 学习习惯评分(规律性×100
habit_score = learning_habit.consistency_score * 100
# 加权综合
overall = (
academic_avg * (self.WEIGHT_HOMEWORK_SCORE + self.WEIGHT_EXAM_SCORE)
+ academic_avg * self.WEIGHT_PRACTICE
+ writing_score * self.WEIGHT_WRITING
+ habit_score * self.WEIGHT_HABIT
)
return min(100.0, max(0.0, overall))
async def _calculate_rankings(
self,
student_id: str,
class_id: str,
grade: str,
score: float,
) -> Tuple[int, int, float]:
"""
计算班级排名和年级百分位排名
从ClickHouse查询同班和同年级学生的综合评分,
计算当前学生的排名位置。
"""
# 查询同班学生评分
# class_scores = await query_class_scores(class_id)
# class_rank = sum(1 for s in class_scores if s > score) + 1
# 查询同年级学生评分
# grade_scores = await query_grade_scores(grade)
# grade_rank = sum(1 for s in grade_scores if s > score) + 1
# percentile = (1 - grade_rank / max(len(grade_scores), 1)) * 100
return 0, 0, 0.0
async def _save_profile(self, profile: ComprehensiveProfile) -> None:
"""将画像数据写入ClickHouse画像宽表"""
# clickhouse_client.execute(
# "INSERT INTO student_profile VALUES",
# [profile_to_row(profile)],
# )
pass