software copyright

This commit is contained in:
jiahong
2026-03-22 15:24:40 +08:00
parent e303bb868a
commit 60f336e345
155 changed files with 127262 additions and 0 deletions
@@ -0,0 +1,365 @@
# 自然写教学数据分析与学情诊断系统软件 V1.0
# analytics/knowledge_graph.py - Neo4j知识图谱查询与推理引擎
import logging
from typing import Any, Dict, List, Optional, Tuple
from dataclasses import dataclass, field
logger = logging.getLogger("writech.analytics.knowledge_graph")
# ============================================================
# 知识图谱数据模型
# ============================================================
@dataclass
class KnowledgeNode:
"""知识点节点"""
node_id: str
name: str
subject: str
grade: str
chapter: str = ""
section: str = ""
difficulty: float = 0.5 # 难度系数 0-1
importance: float = 0.5 # 重要程度 0-1
description: str = ""
@dataclass
class KnowledgeEdge:
"""知识点关系边"""
source_id: str
target_id: str
relation_type: str # prerequisite/includes/related
weight: float = 1.0
@dataclass
class StudentMastery:
"""学生对某知识点的掌握度"""
student_id: str
knowledge_id: str
mastery_level: float = 0.0 # 掌握度 0-1
practice_count: int = 0
correct_count: int = 0
error_count: int = 0
last_practice: str = ""
@dataclass
class ErrorAttribution:
"""错题归因结果"""
question_id: str
error_knowledge_ids: List[str] # 直接关联知识点
root_cause_ids: List[str] # 根因知识点(前驱未掌握)
suggestion: str = ""
# ============================================================
# 知识图谱引擎
# ============================================================
class KnowledgeGraphEngine:
"""
Neo4j知识图谱引擎
负责:
1. 知识点图谱的查询与遍历
2. 错题归因推理(追溯前驱知识点)
3. 学习路径推荐
4. 知识点掌握度聚合计算
"""
def __init__(self, uri: str, user: str, password: str):
"""初始化Neo4j连接"""
self.uri = uri
self.user = user
self.password = password
# self._driver = GraphDatabase.driver(uri, auth=(user, password))
logger.info("知识图谱引擎初始化: %s", uri)
async def query_subject_graph(
self, subject: str, grade: Optional[str] = None
) -> Tuple[List[KnowledgeNode], List[KnowledgeEdge]]:
"""
查询某科目的完整知识图谱结构
Args:
subject: 科目名称
grade: 可选年级过滤
Returns:
(节点列表, 边列表)
"""
logger.info("查询知识图谱: subject=%s, grade=%s", subject, grade)
# Cypher查询:获取所有知识点节点
node_query = """
MATCH (k:KnowledgePoint {subject: $subject})
WHERE ($grade IS NULL OR k.grade = $grade)
RETURN k.id AS id, k.name AS name, k.subject AS subject,
k.grade AS grade, k.chapter AS chapter, k.section AS section,
k.difficulty AS difficulty, k.importance AS importance,
k.description AS description
ORDER BY k.chapter, k.section
"""
# Cypher查询:获取所有关系边
edge_query = """
MATCH (a:KnowledgePoint {subject: $subject})-[r]->(b:KnowledgePoint)
WHERE ($grade IS NULL OR a.grade = $grade)
RETURN a.id AS source, b.id AS target, type(r) AS relation,
r.weight AS weight
"""
nodes: List[KnowledgeNode] = []
edges: List[KnowledgeEdge] = []
# async with self._driver.async_session() as session:
# # 查询节点
# result = await session.run(node_query, subject=subject, grade=grade)
# async for record in result:
# nodes.append(KnowledgeNode(
# node_id=record["id"],
# name=record["name"],
# ...
# ))
#
# # 查询边
# result = await session.run(edge_query, subject=subject, grade=grade)
# async for record in result:
# edges.append(KnowledgeEdge(
# source_id=record["source"],
# target_id=record["target"],
# relation_type=record["relation"],
# weight=record["weight"] or 1.0,
# ))
logger.info(
"图谱查询完成: %d节点, %d", len(nodes), len(edges)
)
return nodes, edges
async def query_prerequisites(
self, knowledge_id: str, max_depth: int = 3
) -> List[KnowledgeNode]:
"""
查询知识点的前驱依赖链(递归向上追溯)
用于错题归因:当某知识点未掌握时,追溯其前驱
知识点是否也未掌握,找到根本原因。
Args:
knowledge_id: 目标知识点ID
max_depth: 最大追溯深度
Returns:
前驱知识点列表(按依赖顺序排列)
"""
query = """
MATCH path = (target:KnowledgePoint {id: $kid})
<-[:PREREQUISITE*1..$depth]-(prereq:KnowledgePoint)
RETURN prereq.id AS id, prereq.name AS name,
prereq.subject AS subject, prereq.grade AS grade,
prereq.chapter AS chapter, prereq.difficulty AS difficulty,
length(path) AS distance
ORDER BY distance ASC
"""
prerequisites: List[KnowledgeNode] = []
# async with self._driver.async_session() as session:
# result = await session.run(
# query, kid=knowledge_id, depth=max_depth
# )
# async for record in result:
# prerequisites.append(KnowledgeNode(
# node_id=record["id"],
# name=record["name"],
# ...
# ))
logger.debug(
"知识点 %s 的前驱链: %d",
knowledge_id,
len(prerequisites),
)
return prerequisites
async def attribute_errors(
self,
student_id: str,
error_question_ids: List[str],
mastery_map: Dict[str, float],
) -> List[ErrorAttribution]:
"""
错题归因分析
对每道错题:
1. 查找该题关联的知识点
2. 查找这些知识点的前驱知识点
3. 检查前驱知识点的掌握度
4. 如果前驱也未掌握,则认为是根因
Args:
student_id: 学生ID
error_question_ids: 错题ID列表
mastery_map: {knowledge_id: mastery_level} 掌握度映射
Returns:
错题归因结果列表
"""
logger.info(
"错题归因: student=%s, 错题数=%d",
student_id,
len(error_question_ids),
)
attributions: List[ErrorAttribution] = []
mastery_threshold = 0.6 # 掌握度阈值(低于此视为未掌握)
for question_id in error_question_ids:
# 查询错题关联的知识点
# question_kps = await self._query_question_knowledge(question_id)
question_kps: List[str] = []
root_causes: List[str] = []
for kp_id in question_kps:
mastery = mastery_map.get(kp_id, 0.0)
if mastery < mastery_threshold:
# 该知识点未掌握,追溯前驱
prereqs = await self.query_prerequisites(kp_id)
for prereq in prereqs:
prereq_mastery = mastery_map.get(
prereq.node_id, 0.0
)
if prereq_mastery < mastery_threshold:
# 前驱也未掌握,记为根因
if prereq.node_id not in root_causes:
root_causes.append(prereq.node_id)
# 生成归因建议
suggestion = self._generate_suggestion(
question_kps, root_causes, mastery_map
)
attributions.append(ErrorAttribution(
question_id=question_id,
error_knowledge_ids=question_kps,
root_cause_ids=root_causes,
suggestion=suggestion,
))
return attributions
def _generate_suggestion(
self,
knowledge_ids: List[str],
root_cause_ids: List[str],
mastery_map: Dict[str, float],
) -> str:
"""根据归因结果生成学习建议"""
if root_cause_ids:
return (
f"建议先复习前驱知识点(共{len(root_cause_ids)}个),"
f"夯实基础后再针对性练习当前知识点"
)
elif knowledge_ids:
avg_mastery = sum(
mastery_map.get(k, 0) for k in knowledge_ids
) / max(len(knowledge_ids), 1)
if avg_mastery < 0.3:
return "该知识点掌握度较低,建议从基础概念开始系统学习"
elif avg_mastery < 0.6:
return "该知识点已有一定基础,建议加强专项练习巩固提升"
else:
return "知识点掌握较好,本次错误可能是粗心或审题不清"
return "暂无具体建议"
async def recommend_learning_path(
self,
student_id: str,
target_knowledge_id: str,
mastery_map: Dict[str, float],
) -> List[KnowledgeNode]:
"""
学习路径推荐
基于知识图谱拓扑排序,为学生推荐从当前水平到
目标知识点的最优学习路径。
原则:
1. 先补足未掌握的前驱知识点
2. 按难度从低到高排序
3. 已掌握的知识点可跳过
"""
# 获取目标知识点的所有前驱
all_prereqs = await self.query_prerequisites(
target_knowledge_id, max_depth=5
)
# 过滤出未掌握的前驱知识点
unmastered = [
node for node in all_prereqs
if mastery_map.get(node.node_id, 0.0) < 0.6
]
# 按难度从低到高排序
unmastered.sort(key=lambda n: n.difficulty)
# 添加目标知识点本身
# target_node = await self._get_knowledge_node(target_knowledge_id)
# if target_node:
# unmastered.append(target_node)
logger.info(
"学习路径推荐: student=%s, target=%s, 路径长度=%d",
student_id,
target_knowledge_id,
len(unmastered),
)
return unmastered
async def aggregate_chapter_mastery(
self,
student_id: str,
subject: str,
mastery_map: Dict[str, float],
) -> List[Dict[str, Any]]:
"""
按章节聚合知识点掌握度
将知识图谱按章节分组,计算每章的综合掌握度,
用于生成章节维度的学情雷达图。
"""
nodes, _ = await self.query_subject_graph(subject)
# 按章节分组
chapter_map: Dict[str, List[float]] = {}
for node in nodes:
chapter = node.chapter or "其他"
mastery = mastery_map.get(node.node_id, 0.0)
chapter_map.setdefault(chapter, []).append(mastery)
# 计算各章节平均掌握度
result = []
for chapter, masteries in chapter_map.items():
avg_mastery = sum(masteries) / max(len(masteries), 1)
result.append({
"chapter": chapter,
"avg_mastery": round(avg_mastery, 3),
"knowledge_count": len(masteries),
"mastered_count": sum(1 for m in masteries if m >= 0.6),
})
result.sort(key=lambda x: x["chapter"])
return result
async def close(self) -> None:
"""关闭Neo4j连接"""
# await self._driver.close()
logger.info("知识图谱引擎已关闭")
@@ -0,0 +1,541 @@
# 自然写教学数据分析与学情诊断系统软件 V1.0
# analytics/student_profiler.py - 学生画像分析引擎
import logging
import math
from typing import Any, Dict, List, Optional, Tuple
from datetime import datetime, date, timedelta
from dataclasses import dataclass, field
logger = logging.getLogger("writech.analytics.profiler")
# ============================================================
# 画像分析数据模型
# ============================================================
@dataclass
class ScoreTrend:
"""成绩趋势数据点"""
date: str
score: float
subject: str
exam_type: str = "" # homework/exam/practice
@dataclass
class SubjectAbility:
"""科目能力评估"""
subject: str
overall_score: float = 0.0
knowledge_coverage: float = 0.0 # 知识点覆盖率
practice_frequency: float = 0.0 # 练习频率(次/周)
improvement_rate: float = 0.0 # 进步速率
stability: float = 0.0 # 稳定性(分数方差的倒数)
@dataclass
class LearningHabit:
"""学习习惯画像"""
avg_daily_minutes: float = 0.0
peak_study_hour: int = 0 # 学习高峰时段(小时)
weekly_pattern: List[float] = field(default_factory=list) # 周一~日时长
consistency_score: float = 0.0 # 学习规律性评分
homework_timeliness: float = 0.0 # 作业及时提交率
@dataclass
class WritingAbility:
"""书写能力评估"""
stroke_order_accuracy: float = 0.0 # 笔顺正确率
writing_quality: float = 0.0 # 书写规范性
writing_speed: float = 0.0 # 书写速度(字/分)
char_structure_score: float = 0.0 # 字形结构评分
improvement_trend: str = "stable" # 进步趋势
@dataclass
class ComprehensiveProfile:
"""综合学情画像"""
student_id: str
student_name: str
class_id: str
grade: str
school_id: str
# 综合评分
overall_score: float = 0.0
rank_in_class: int = 0
rank_in_grade: int = 0
percentile: float = 0.0
# 各科能力
subject_abilities: List[SubjectAbility] = field(default_factory=list)
# 学习习惯
learning_habit: Optional[LearningHabit] = None
# 书写能力
writing_ability: Optional[WritingAbility] = None
# 成绩趋势
score_trends: List[ScoreTrend] = field(default_factory=list)
# 分析时间
analyzed_at: str = ""
# ============================================================
# 画像分析引擎
# ============================================================
class StudentProfiler:
"""
学生画像分析引擎
功能:
1. 综合学情评分计算
2. 各科目能力多维评估
3. 学习习惯分析
4. 书写能力评估
5. 成绩趋势分析与预测
6. 班级/年级排名计算
"""
# 各维度权重(用于综合评分计算)
WEIGHT_HOMEWORK_SCORE = 0.30 # 作业成绩权重
WEIGHT_EXAM_SCORE = 0.35 # 考试成绩权重
WEIGHT_PRACTICE = 0.15 # 练习表现权重
WEIGHT_WRITING = 0.10 # 书写能力权重
WEIGHT_HABIT = 0.10 # 学习习惯权重
# 评分标准
EXCELLENT_THRESHOLD = 90.0
GOOD_THRESHOLD = 75.0
PASS_THRESHOLD = 60.0
def __init__(self):
"""初始化画像分析引擎"""
logger.info("学生画像分析引擎初始化")
async def build_profile(
self,
student_id: str,
student_info: Dict[str, Any],
period_days: int = 30,
) -> ComprehensiveProfile:
"""
构建学生综合画像
Args:
student_id: 学生ID
student_info: 学生基本信息
period_days: 分析周期(天)
Returns:
综合学情画像
"""
logger.info(
"构建学生画像: %s, 分析周期=%d", student_id, period_days
)
end_date = date.today()
start_date = end_date - timedelta(days=period_days)
# 1. 获取原始数据
homework_data = await self._fetch_homework_data(
student_id, start_date, end_date
)
exam_data = await self._fetch_exam_data(
student_id, start_date, end_date
)
practice_data = await self._fetch_practice_data(
student_id, start_date, end_date
)
writing_data = await self._fetch_writing_data(
student_id, start_date, end_date
)
usage_data = await self._fetch_usage_data(
student_id, start_date, end_date
)
# 2. 分析各维度
subject_abilities = self._analyze_subject_abilities(
homework_data, exam_data, practice_data
)
learning_habit = self._analyze_learning_habit(usage_data)
writing_ability = self._analyze_writing_ability(writing_data)
score_trends = self._analyze_score_trends(
homework_data, exam_data
)
# 3. 计算综合评分
overall_score = self._calculate_overall_score(
subject_abilities, learning_habit, writing_ability
)
# 4. 计算排名
rank_in_class, rank_in_grade, percentile = (
await self._calculate_rankings(
student_id,
student_info.get("class_id", ""),
student_info.get("grade", ""),
overall_score,
)
)
profile = ComprehensiveProfile(
student_id=student_id,
student_name=student_info.get("name", ""),
class_id=student_info.get("class_id", ""),
grade=student_info.get("grade", ""),
school_id=student_info.get("school_id", ""),
overall_score=round(overall_score, 1),
rank_in_class=rank_in_class,
rank_in_grade=rank_in_grade,
percentile=round(percentile, 1),
subject_abilities=subject_abilities,
learning_habit=learning_habit,
writing_ability=writing_ability,
score_trends=score_trends,
analyzed_at=datetime.now().isoformat(),
)
# 5. 写入ClickHouse画像宽表
await self._save_profile(profile)
logger.info(
"画像构建完成: %s, 综合评分=%.1f, 班级排名=%d",
student_id, overall_score, rank_in_class,
)
return profile
async def _fetch_homework_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""从ClickHouse获取作业成绩数据"""
# query = """
# SELECT subject, score, total_score, submitted_at, is_on_time
# FROM homework_submissions
# WHERE student_id = %(sid)s
# AND submitted_at BETWEEN %(start)s AND %(end)s
# ORDER BY submitted_at
# """
# return await clickhouse_query(query, {
# "sid": student_id, "start": str(start), "end": str(end)
# })
return []
async def _fetch_exam_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""从ClickHouse获取考试成绩数据"""
return []
async def _fetch_practice_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""获取练习(字帖/笔顺)数据"""
return []
async def _fetch_writing_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""获取书写质量评分数据"""
return []
async def _fetch_usage_data(
self, student_id: str, start: date, end: date
) -> List[Dict[str, Any]]:
"""获取应用使用时长数据"""
return []
def _analyze_subject_abilities(
self,
homework_data: List[Dict[str, Any]],
exam_data: List[Dict[str, Any]],
practice_data: List[Dict[str, Any]],
) -> List[SubjectAbility]:
"""
各科目能力多维评估
评估维度:
- 作业/考试平均分
- 知识点覆盖率(已接触/总知识点数)
- 练习频率(次/周)
- 进步速率(最近30天vs前30天分数差)
- 稳定性(分数标准差的倒数归一化)
"""
subject_map: Dict[str, Dict[str, List[float]]] = {}
# 按科目聚合作业分数
for hw in homework_data:
subject = hw.get("subject", "unknown")
subject_map.setdefault(subject, {"scores": [], "dates": []})
total = hw.get("total_score", 100)
score = hw.get("score", 0)
normalized = (score / max(total, 1)) * 100
subject_map[subject]["scores"].append(normalized)
# 按科目聚合考试分数
for exam in exam_data:
subject = exam.get("subject", "unknown")
subject_map.setdefault(subject, {"scores": [], "dates": []})
total = exam.get("total_score", 100)
score = exam.get("score", 0)
normalized = (score / max(total, 1)) * 100
subject_map[subject]["scores"].append(normalized)
abilities: List[SubjectAbility] = []
for subject, data in subject_map.items():
scores = data["scores"]
if not scores:
continue
avg_score = sum(scores) / len(scores)
# 稳定性: 1 / (1 + std_dev) 归一化到0-1
variance = sum((s - avg_score) ** 2 for s in scores) / max(
len(scores), 1
)
std_dev = math.sqrt(variance)
stability = 1.0 / (1.0 + std_dev / 10) # 归一化
# 进步速率: 后半段均分 - 前半段均分
mid = len(scores) // 2
if mid > 0:
first_half_avg = sum(scores[:mid]) / mid
second_half_avg = sum(scores[mid:]) / max(
len(scores) - mid, 1
)
improvement = second_half_avg - first_half_avg
else:
improvement = 0.0
abilities.append(SubjectAbility(
subject=subject,
overall_score=round(avg_score, 1),
stability=round(stability, 3),
improvement_rate=round(improvement, 1),
))
return abilities
def _analyze_learning_habit(
self, usage_data: List[Dict[str, Any]]
) -> LearningHabit:
"""
学习习惯分析
分析维度:
- 日均学习时长
- 学习高峰时段
- 周学习模式(周一到周日)
- 学习规律性评分
"""
if not usage_data:
return LearningHabit()
# 按日期聚合使用时长
daily_minutes: Dict[str, float] = {}
hourly_counts: Dict[int, int] = {}
weekday_minutes: Dict[int, List[float]] = {
i: [] for i in range(7)
}
for record in usage_data:
date_str = record.get("date", "")
minutes = record.get("duration_minutes", 0)
hour = record.get("start_hour", 0)
daily_minutes[date_str] = (
daily_minutes.get(date_str, 0) + minutes
)
hourly_counts[hour] = hourly_counts.get(hour, 0) + 1
# 日均时长
total_days = max(len(daily_minutes), 1)
avg_daily = sum(daily_minutes.values()) / total_days
# 学习高峰时段
peak_hour = max(
hourly_counts, key=hourly_counts.get, default=0
)
# 学习规律性: 日均时长的变异系数越小越规律
if daily_minutes:
values = list(daily_minutes.values())
mean_val = sum(values) / len(values)
variance = sum((v - mean_val) ** 2 for v in values) / len(
values
)
std_val = math.sqrt(variance)
cv = std_val / max(mean_val, 1)
consistency = max(0.0, 1.0 - cv) # 变异系数越小规律性越高
else:
consistency = 0.0
return LearningHabit(
avg_daily_minutes=round(avg_daily, 1),
peak_study_hour=peak_hour,
consistency_score=round(consistency, 3),
)
def _analyze_writing_ability(
self, writing_data: List[Dict[str, Any]]
) -> WritingAbility:
"""
书写能力评估
基于笔顺准确率、书写规范性评分、书写速度等维度综合评估。
通过对比最近和较早的数据判断进步趋势。
"""
if not writing_data:
return WritingAbility()
# 计算各维度平均值
stroke_scores = [
d.get("stroke_order_score", 0) for d in writing_data
]
quality_scores = [
d.get("quality_score", 0) for d in writing_data
]
speeds = [d.get("speed", 0) for d in writing_data]
structure_scores = [
d.get("structure_score", 0) for d in writing_data
]
avg_stroke = sum(stroke_scores) / max(len(stroke_scores), 1)
avg_quality = sum(quality_scores) / max(len(quality_scores), 1)
avg_speed = sum(speeds) / max(len(speeds), 1)
avg_structure = sum(structure_scores) / max(
len(structure_scores), 1
)
# 判断趋势: 后半段 vs 前半段
mid = len(quality_scores) // 2
if mid > 0:
early_avg = sum(quality_scores[:mid]) / mid
recent_avg = sum(quality_scores[mid:]) / max(
len(quality_scores) - mid, 1
)
if recent_avg - early_avg > 3:
trend = "improving"
elif early_avg - recent_avg > 3:
trend = "declining"
else:
trend = "stable"
else:
trend = "stable"
return WritingAbility(
stroke_order_accuracy=round(avg_stroke, 1),
writing_quality=round(avg_quality, 1),
writing_speed=round(avg_speed, 1),
char_structure_score=round(avg_structure, 1),
improvement_trend=trend,
)
def _analyze_score_trends(
self,
homework_data: List[Dict[str, Any]],
exam_data: List[Dict[str, Any]],
) -> List[ScoreTrend]:
"""生成成绩趋势数据"""
trends: List[ScoreTrend] = []
for hw in homework_data:
total = hw.get("total_score", 100)
score = hw.get("score", 0)
normalized = (score / max(total, 1)) * 100
trends.append(ScoreTrend(
date=hw.get("submitted_at", "")[:10],
score=round(normalized, 1),
subject=hw.get("subject", ""),
exam_type="homework",
))
for exam in exam_data:
total = exam.get("total_score", 100)
score = exam.get("score", 0)
normalized = (score / max(total, 1)) * 100
trends.append(ScoreTrend(
date=exam.get("exam_date", "")[:10],
score=round(normalized, 1),
subject=exam.get("subject", ""),
exam_type="exam",
))
# 按日期排序
trends.sort(key=lambda t: t.date)
return trends
def _calculate_overall_score(
self,
subject_abilities: List[SubjectAbility],
learning_habit: LearningHabit,
writing_ability: WritingAbility,
) -> float:
"""
计算综合评分(百分制)
加权公式:
综合分 = 作业成绩×0.30 + 考试成绩×0.35 + 练习×0.15
+ 书写×0.10 + 学习习惯×0.10
"""
# 作业/考试平均分
if subject_abilities:
academic_avg = sum(
a.overall_score for a in subject_abilities
) / len(subject_abilities)
else:
academic_avg = 0.0
# 书写能力评分(归一化到百分制)
writing_score = writing_ability.writing_quality
# 学习习惯评分(规律性×100
habit_score = learning_habit.consistency_score * 100
# 加权综合
overall = (
academic_avg * (self.WEIGHT_HOMEWORK_SCORE + self.WEIGHT_EXAM_SCORE)
+ academic_avg * self.WEIGHT_PRACTICE
+ writing_score * self.WEIGHT_WRITING
+ habit_score * self.WEIGHT_HABIT
)
return min(100.0, max(0.0, overall))
async def _calculate_rankings(
self,
student_id: str,
class_id: str,
grade: str,
score: float,
) -> Tuple[int, int, float]:
"""
计算班级排名和年级百分位排名
从ClickHouse查询同班和同年级学生的综合评分,
计算当前学生的排名位置。
"""
# 查询同班学生评分
# class_scores = await query_class_scores(class_id)
# class_rank = sum(1 for s in class_scores if s > score) + 1
# 查询同年级学生评分
# grade_scores = await query_grade_scores(grade)
# grade_rank = sum(1 for s in grade_scores if s > score) + 1
# percentile = (1 - grade_rank / max(len(grade_scores), 1)) * 100
return 0, 0, 0.0
async def _save_profile(self, profile: ComprehensiveProfile) -> None:
"""将画像数据写入ClickHouse画像宽表"""
# clickhouse_client.execute(
# "INSERT INTO student_profile VALUES",
# [profile_to_row(profile)],
# )
pass
@@ -0,0 +1,460 @@
# 自然写教学数据分析与学情诊断系统软件 V1.0
# analytics/writing_growth.py - 书写能力成长评测引擎
import logging
import math
from typing import Any, Dict, List, Optional, Tuple
from datetime import datetime, date, timedelta
from dataclasses import dataclass, field
logger = logging.getLogger("writech.analytics.writing_growth")
# ============================================================
# 书写成长数据模型
# ============================================================
@dataclass
class WritingSnapshot:
"""书写能力时间切片"""
date: str
stroke_order_accuracy: float = 0.0
writing_quality: float = 0.0
writing_speed: float = 0.0
char_structure: float = 0.0
practice_count: int = 0
total_chars: int = 0
@dataclass
class CharacterProgress:
"""单字书写进步记录"""
character: str
first_score: float
latest_score: float
best_score: float
practice_count: int
improvement: float # latest - first
mastery_level: str # beginner/intermediate/advanced/master
@dataclass
class WritingGrowthReport:
"""书写成长评测报告"""
student_id: str
period_start: str
period_end: str
# 总体评级
overall_level: str = "" # 初学/入门/进阶/优秀/精通
overall_score: float = 0.0
overall_trend: str = "stable"
# 各维度评分与趋势
stroke_order_score: float = 0.0
stroke_order_trend: str = "stable"
quality_score: float = 0.0
quality_trend: str = "stable"
speed_score: float = 0.0
speed_trend: str = "stable"
structure_score: float = 0.0
structure_trend: str = "stable"
# 时序数据
snapshots: List[WritingSnapshot] = field(default_factory=list)
# 单字进步排行
most_improved_chars: List[CharacterProgress] = field(
default_factory=list
)
needs_practice_chars: List[CharacterProgress] = field(
default_factory=list
)
# 练习统计
total_practice_sessions: int = 0
total_characters_written: int = 0
avg_daily_practice_minutes: float = 0.0
# 生成时间
analyzed_at: str = ""
# ============================================================
# 书写成长评测引擎
# ============================================================
class WritingGrowthAnalyzer:
"""
书写能力成长评测引擎
功能:
1. 多维度书写能力评分(笔顺、规范性、速度、结构)
2. 成长趋势分析(移动平均法平滑噪声)
3. 单字进步追踪
4. 书写等级评定
5. 书写问题诊断
"""
# 书写等级评定标准
LEVEL_THRESHOLDS = {
"精通": 95.0,
"优秀": 85.0,
"进阶": 70.0,
"入门": 50.0,
"初学": 0.0,
}
# 各维度权重
WEIGHTS = {
"stroke_order": 0.25,
"quality": 0.35,
"speed": 0.15,
"structure": 0.25,
}
def __init__(self):
logger.info("书写成长评测引擎初始化")
async def analyze_growth(
self,
student_id: str,
start_date: str,
end_date: str,
granularity: str = "weekly",
) -> WritingGrowthReport:
"""
分析学生书写能力成长情况
Args:
student_id: 学生ID
start_date: 分析起始日期
end_date: 分析结束日期
granularity: 时间粒度(daily/weekly/monthly
Returns:
书写成长评测报告
"""
logger.info(
"书写成长分析: student=%s, %s~%s, 粒度=%s",
student_id, start_date, end_date, granularity,
)
# 1. 获取原始书写评分数据
raw_data = await self._fetch_writing_scores(
student_id, start_date, end_date
)
# 2. 按时间粒度聚合
snapshots = self._aggregate_by_period(raw_data, granularity)
# 3. 计算各维度评分和趋势
stroke_score, stroke_trend = self._calc_dimension_trend(
[s.stroke_order_accuracy for s in snapshots]
)
quality_score, quality_trend = self._calc_dimension_trend(
[s.writing_quality for s in snapshots]
)
speed_score, speed_trend = self._calc_dimension_trend(
[s.writing_speed for s in snapshots]
)
structure_score, structure_trend = self._calc_dimension_trend(
[s.char_structure for s in snapshots]
)
# 4. 计算综合评分
overall_score = self._calc_overall_score(
stroke_score, quality_score, speed_score, structure_score
)
overall_level = self._determine_level(overall_score)
overall_trend = self._determine_overall_trend(snapshots)
# 5. 分析单字进步
char_data = await self._fetch_character_scores(
student_id, start_date, end_date
)
most_improved, needs_practice = self._analyze_char_progress(
char_data
)
# 6. 练习统计
total_sessions = sum(s.practice_count for s in snapshots)
total_chars = sum(s.total_chars for s in snapshots)
days = max(
(
datetime.fromisoformat(end_date)
- datetime.fromisoformat(start_date)
).days,
1,
)
avg_daily = total_chars / days * 0.5 # 估算每日练习分钟
report = WritingGrowthReport(
student_id=student_id,
period_start=start_date,
period_end=end_date,
overall_level=overall_level,
overall_score=round(overall_score, 1),
overall_trend=overall_trend,
stroke_order_score=round(stroke_score, 1),
stroke_order_trend=stroke_trend,
quality_score=round(quality_score, 1),
quality_trend=quality_trend,
speed_score=round(speed_score, 1),
speed_trend=speed_trend,
structure_score=round(structure_score, 1),
structure_trend=structure_trend,
snapshots=snapshots,
most_improved_chars=most_improved[:10],
needs_practice_chars=needs_practice[:10],
total_practice_sessions=total_sessions,
total_characters_written=total_chars,
avg_daily_practice_minutes=round(avg_daily, 1),
analyzed_at=datetime.now().isoformat(),
)
return report
async def _fetch_writing_scores(
self, student_id: str, start: str, end: str
) -> List[Dict[str, Any]]:
"""从ClickHouse获取书写评分原始数据"""
# query = """
# SELECT date, stroke_order_accuracy, writing_quality,
# writing_speed, char_structure, practice_count, total_chars
# FROM writing_growth
# WHERE student_id = %(sid)s
# AND date BETWEEN %(start)s AND %(end)s
# ORDER BY date
# """
return []
async def _fetch_character_scores(
self, student_id: str, start: str, end: str
) -> List[Dict[str, Any]]:
"""获取单字练习评分数据"""
# query = """
# SELECT character, score, practice_at
# FROM practice_records
# WHERE student_id = %(sid)s
# AND practice_at BETWEEN %(start)s AND %(end)s
# ORDER BY character, practice_at
# """
return []
def _aggregate_by_period(
self,
raw_data: List[Dict[str, Any]],
granularity: str,
) -> List[WritingSnapshot]:
"""按时间粒度聚合书写评分"""
if not raw_data:
return []
# 按日期分组
period_map: Dict[str, List[Dict[str, Any]]] = {}
for record in raw_data:
date_str = record.get("date", "")
if granularity == "weekly":
# 按周分组(取周一日期)
dt = datetime.fromisoformat(date_str)
week_start = dt - timedelta(days=dt.weekday())
period_key = week_start.date().isoformat()
elif granularity == "monthly":
period_key = date_str[:7] # YYYY-MM
else:
period_key = date_str
period_map.setdefault(period_key, []).append(record)
# 聚合每个周期
snapshots: List[WritingSnapshot] = []
for period, records in sorted(period_map.items()):
n = len(records)
snapshot = WritingSnapshot(
date=period,
stroke_order_accuracy=sum(
r.get("stroke_order_accuracy", 0) for r in records
) / n,
writing_quality=sum(
r.get("writing_quality", 0) for r in records
) / n,
writing_speed=sum(
r.get("writing_speed", 0) for r in records
) / n,
char_structure=sum(
r.get("char_structure", 0) for r in records
) / n,
practice_count=sum(
r.get("practice_count", 0) for r in records
),
total_chars=sum(
r.get("total_chars", 0) for r in records
),
)
snapshots.append(snapshot)
return snapshots
def _calc_dimension_trend(
self, values: List[float]
) -> Tuple[float, str]:
"""
计算某维度的当前评分和趋势
使用指数移动平均(EMA)平滑数据噪声,
对比最近EMA与早期EMA判断趋势。
"""
if not values:
return 0.0, "stable"
# 指数移动平均(衰减因子0.3
alpha = 0.3
ema_values = [values[0]]
for i in range(1, len(values)):
ema = alpha * values[i] + (1 - alpha) * ema_values[-1]
ema_values.append(ema)
current_score = ema_values[-1]
# 趋势判断:对比前半段和后半段的EMA均值
if len(ema_values) >= 4:
mid = len(ema_values) // 2
early_avg = sum(ema_values[:mid]) / mid
recent_avg = sum(ema_values[mid:]) / (len(ema_values) - mid)
diff = recent_avg - early_avg
if diff > 3:
trend = "improving"
elif diff < -3:
trend = "declining"
else:
trend = "stable"
else:
trend = "stable"
return current_score, trend
def _calc_overall_score(
self,
stroke: float,
quality: float,
speed: float,
structure: float,
) -> float:
"""加权计算综合书写评分"""
return (
stroke * self.WEIGHTS["stroke_order"]
+ quality * self.WEIGHTS["quality"]
+ speed * self.WEIGHTS["speed"]
+ structure * self.WEIGHTS["structure"]
)
def _determine_level(self, score: float) -> str:
"""根据综合评分确定书写等级"""
for level, threshold in self.LEVEL_THRESHOLDS.items():
if score >= threshold:
return level
return "初学"
def _determine_overall_trend(
self, snapshots: List[WritingSnapshot]
) -> str:
"""判断总体趋势"""
if len(snapshots) < 2:
return "stable"
# 计算每个快照的综合分
scores = []
for s in snapshots:
overall = self._calc_overall_score(
s.stroke_order_accuracy,
s.writing_quality,
s.writing_speed,
s.char_structure,
)
scores.append(overall)
# 简单线性回归斜率判断趋势
n = len(scores)
x_mean = (n - 1) / 2
y_mean = sum(scores) / n
numerator = sum(
(i - x_mean) * (scores[i] - y_mean) for i in range(n)
)
denominator = sum((i - x_mean) ** 2 for i in range(n))
if denominator == 0:
return "stable"
slope = numerator / denominator
if slope > 0.5:
return "improving"
elif slope < -0.5:
return "declining"
return "stable"
def _analyze_char_progress(
self, char_data: List[Dict[str, Any]]
) -> Tuple[List[CharacterProgress], List[CharacterProgress]]:
"""
分析单字进步情况
对每个练习过的汉字,比较首次评分和最近评分,
找出进步最大的字和仍需练习的字。
"""
char_map: Dict[str, List[Tuple[float, str]]] = {}
for record in char_data:
char = record.get("character", "")
score = record.get("score", 0.0)
practice_at = record.get("practice_at", "")
char_map.setdefault(char, []).append((score, practice_at))
progress_list: List[CharacterProgress] = []
for char, entries in char_map.items():
# 按时间排序
entries.sort(key=lambda e: e[1])
first_score = entries[0][0]
latest_score = entries[-1][0]
best_score = max(e[0] for e in entries)
improvement = latest_score - first_score
# 掌握等级判定
if latest_score >= 90:
level = "master"
elif latest_score >= 75:
level = "advanced"
elif latest_score >= 60:
level = "intermediate"
else:
level = "beginner"
progress_list.append(CharacterProgress(
character=char,
first_score=first_score,
latest_score=latest_score,
best_score=best_score,
practice_count=len(entries),
improvement=round(improvement, 1),
mastery_level=level,
))
# 按进步幅度降序排列(进步最大的)
most_improved = sorted(
progress_list, key=lambda p: p.improvement, reverse=True
)
# 仍需练习的(最新分低于70且练习次数>3)
needs_practice = sorted(
[
p for p in progress_list
if p.latest_score < 70 and p.practice_count > 3
],
key=lambda p: p.latest_score,
)
return most_improved, needs_practice