software copyright
This commit is contained in:
@@ -0,0 +1,365 @@
|
||||
# 自然写教学数据分析与学情诊断系统软件 V1.0
|
||||
# analytics/knowledge_graph.py - Neo4j知识图谱查询与推理引擎
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
logger = logging.getLogger("writech.analytics.knowledge_graph")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 知识图谱数据模型
|
||||
# ============================================================
|
||||
|
||||
@dataclass
|
||||
class KnowledgeNode:
|
||||
"""知识点节点"""
|
||||
node_id: str
|
||||
name: str
|
||||
subject: str
|
||||
grade: str
|
||||
chapter: str = ""
|
||||
section: str = ""
|
||||
difficulty: float = 0.5 # 难度系数 0-1
|
||||
importance: float = 0.5 # 重要程度 0-1
|
||||
description: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class KnowledgeEdge:
|
||||
"""知识点关系边"""
|
||||
source_id: str
|
||||
target_id: str
|
||||
relation_type: str # prerequisite/includes/related
|
||||
weight: float = 1.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class StudentMastery:
|
||||
"""学生对某知识点的掌握度"""
|
||||
student_id: str
|
||||
knowledge_id: str
|
||||
mastery_level: float = 0.0 # 掌握度 0-1
|
||||
practice_count: int = 0
|
||||
correct_count: int = 0
|
||||
error_count: int = 0
|
||||
last_practice: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ErrorAttribution:
|
||||
"""错题归因结果"""
|
||||
question_id: str
|
||||
error_knowledge_ids: List[str] # 直接关联知识点
|
||||
root_cause_ids: List[str] # 根因知识点(前驱未掌握)
|
||||
suggestion: str = ""
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 知识图谱引擎
|
||||
# ============================================================
|
||||
|
||||
class KnowledgeGraphEngine:
|
||||
"""
|
||||
Neo4j知识图谱引擎
|
||||
|
||||
负责:
|
||||
1. 知识点图谱的查询与遍历
|
||||
2. 错题归因推理(追溯前驱知识点)
|
||||
3. 学习路径推荐
|
||||
4. 知识点掌握度聚合计算
|
||||
"""
|
||||
|
||||
def __init__(self, uri: str, user: str, password: str):
|
||||
"""初始化Neo4j连接"""
|
||||
self.uri = uri
|
||||
self.user = user
|
||||
self.password = password
|
||||
# self._driver = GraphDatabase.driver(uri, auth=(user, password))
|
||||
logger.info("知识图谱引擎初始化: %s", uri)
|
||||
|
||||
async def query_subject_graph(
|
||||
self, subject: str, grade: Optional[str] = None
|
||||
) -> Tuple[List[KnowledgeNode], List[KnowledgeEdge]]:
|
||||
"""
|
||||
查询某科目的完整知识图谱结构
|
||||
|
||||
Args:
|
||||
subject: 科目名称
|
||||
grade: 可选年级过滤
|
||||
|
||||
Returns:
|
||||
(节点列表, 边列表)
|
||||
"""
|
||||
logger.info("查询知识图谱: subject=%s, grade=%s", subject, grade)
|
||||
|
||||
# Cypher查询:获取所有知识点节点
|
||||
node_query = """
|
||||
MATCH (k:KnowledgePoint {subject: $subject})
|
||||
WHERE ($grade IS NULL OR k.grade = $grade)
|
||||
RETURN k.id AS id, k.name AS name, k.subject AS subject,
|
||||
k.grade AS grade, k.chapter AS chapter, k.section AS section,
|
||||
k.difficulty AS difficulty, k.importance AS importance,
|
||||
k.description AS description
|
||||
ORDER BY k.chapter, k.section
|
||||
"""
|
||||
|
||||
# Cypher查询:获取所有关系边
|
||||
edge_query = """
|
||||
MATCH (a:KnowledgePoint {subject: $subject})-[r]->(b:KnowledgePoint)
|
||||
WHERE ($grade IS NULL OR a.grade = $grade)
|
||||
RETURN a.id AS source, b.id AS target, type(r) AS relation,
|
||||
r.weight AS weight
|
||||
"""
|
||||
|
||||
nodes: List[KnowledgeNode] = []
|
||||
edges: List[KnowledgeEdge] = []
|
||||
|
||||
# async with self._driver.async_session() as session:
|
||||
# # 查询节点
|
||||
# result = await session.run(node_query, subject=subject, grade=grade)
|
||||
# async for record in result:
|
||||
# nodes.append(KnowledgeNode(
|
||||
# node_id=record["id"],
|
||||
# name=record["name"],
|
||||
# ...
|
||||
# ))
|
||||
#
|
||||
# # 查询边
|
||||
# result = await session.run(edge_query, subject=subject, grade=grade)
|
||||
# async for record in result:
|
||||
# edges.append(KnowledgeEdge(
|
||||
# source_id=record["source"],
|
||||
# target_id=record["target"],
|
||||
# relation_type=record["relation"],
|
||||
# weight=record["weight"] or 1.0,
|
||||
# ))
|
||||
|
||||
logger.info(
|
||||
"图谱查询完成: %d节点, %d边", len(nodes), len(edges)
|
||||
)
|
||||
return nodes, edges
|
||||
|
||||
async def query_prerequisites(
|
||||
self, knowledge_id: str, max_depth: int = 3
|
||||
) -> List[KnowledgeNode]:
|
||||
"""
|
||||
查询知识点的前驱依赖链(递归向上追溯)
|
||||
|
||||
用于错题归因:当某知识点未掌握时,追溯其前驱
|
||||
知识点是否也未掌握,找到根本原因。
|
||||
|
||||
Args:
|
||||
knowledge_id: 目标知识点ID
|
||||
max_depth: 最大追溯深度
|
||||
|
||||
Returns:
|
||||
前驱知识点列表(按依赖顺序排列)
|
||||
"""
|
||||
query = """
|
||||
MATCH path = (target:KnowledgePoint {id: $kid})
|
||||
<-[:PREREQUISITE*1..$depth]-(prereq:KnowledgePoint)
|
||||
RETURN prereq.id AS id, prereq.name AS name,
|
||||
prereq.subject AS subject, prereq.grade AS grade,
|
||||
prereq.chapter AS chapter, prereq.difficulty AS difficulty,
|
||||
length(path) AS distance
|
||||
ORDER BY distance ASC
|
||||
"""
|
||||
|
||||
prerequisites: List[KnowledgeNode] = []
|
||||
# async with self._driver.async_session() as session:
|
||||
# result = await session.run(
|
||||
# query, kid=knowledge_id, depth=max_depth
|
||||
# )
|
||||
# async for record in result:
|
||||
# prerequisites.append(KnowledgeNode(
|
||||
# node_id=record["id"],
|
||||
# name=record["name"],
|
||||
# ...
|
||||
# ))
|
||||
|
||||
logger.debug(
|
||||
"知识点 %s 的前驱链: %d个",
|
||||
knowledge_id,
|
||||
len(prerequisites),
|
||||
)
|
||||
return prerequisites
|
||||
|
||||
async def attribute_errors(
|
||||
self,
|
||||
student_id: str,
|
||||
error_question_ids: List[str],
|
||||
mastery_map: Dict[str, float],
|
||||
) -> List[ErrorAttribution]:
|
||||
"""
|
||||
错题归因分析
|
||||
|
||||
对每道错题:
|
||||
1. 查找该题关联的知识点
|
||||
2. 查找这些知识点的前驱知识点
|
||||
3. 检查前驱知识点的掌握度
|
||||
4. 如果前驱也未掌握,则认为是根因
|
||||
|
||||
Args:
|
||||
student_id: 学生ID
|
||||
error_question_ids: 错题ID列表
|
||||
mastery_map: {knowledge_id: mastery_level} 掌握度映射
|
||||
|
||||
Returns:
|
||||
错题归因结果列表
|
||||
"""
|
||||
logger.info(
|
||||
"错题归因: student=%s, 错题数=%d",
|
||||
student_id,
|
||||
len(error_question_ids),
|
||||
)
|
||||
|
||||
attributions: List[ErrorAttribution] = []
|
||||
mastery_threshold = 0.6 # 掌握度阈值(低于此视为未掌握)
|
||||
|
||||
for question_id in error_question_ids:
|
||||
# 查询错题关联的知识点
|
||||
# question_kps = await self._query_question_knowledge(question_id)
|
||||
question_kps: List[str] = []
|
||||
|
||||
root_causes: List[str] = []
|
||||
|
||||
for kp_id in question_kps:
|
||||
mastery = mastery_map.get(kp_id, 0.0)
|
||||
|
||||
if mastery < mastery_threshold:
|
||||
# 该知识点未掌握,追溯前驱
|
||||
prereqs = await self.query_prerequisites(kp_id)
|
||||
|
||||
for prereq in prereqs:
|
||||
prereq_mastery = mastery_map.get(
|
||||
prereq.node_id, 0.0
|
||||
)
|
||||
if prereq_mastery < mastery_threshold:
|
||||
# 前驱也未掌握,记为根因
|
||||
if prereq.node_id not in root_causes:
|
||||
root_causes.append(prereq.node_id)
|
||||
|
||||
# 生成归因建议
|
||||
suggestion = self._generate_suggestion(
|
||||
question_kps, root_causes, mastery_map
|
||||
)
|
||||
|
||||
attributions.append(ErrorAttribution(
|
||||
question_id=question_id,
|
||||
error_knowledge_ids=question_kps,
|
||||
root_cause_ids=root_causes,
|
||||
suggestion=suggestion,
|
||||
))
|
||||
|
||||
return attributions
|
||||
|
||||
def _generate_suggestion(
|
||||
self,
|
||||
knowledge_ids: List[str],
|
||||
root_cause_ids: List[str],
|
||||
mastery_map: Dict[str, float],
|
||||
) -> str:
|
||||
"""根据归因结果生成学习建议"""
|
||||
if root_cause_ids:
|
||||
return (
|
||||
f"建议先复习前驱知识点(共{len(root_cause_ids)}个),"
|
||||
f"夯实基础后再针对性练习当前知识点"
|
||||
)
|
||||
elif knowledge_ids:
|
||||
avg_mastery = sum(
|
||||
mastery_map.get(k, 0) for k in knowledge_ids
|
||||
) / max(len(knowledge_ids), 1)
|
||||
if avg_mastery < 0.3:
|
||||
return "该知识点掌握度较低,建议从基础概念开始系统学习"
|
||||
elif avg_mastery < 0.6:
|
||||
return "该知识点已有一定基础,建议加强专项练习巩固提升"
|
||||
else:
|
||||
return "知识点掌握较好,本次错误可能是粗心或审题不清"
|
||||
return "暂无具体建议"
|
||||
|
||||
async def recommend_learning_path(
|
||||
self,
|
||||
student_id: str,
|
||||
target_knowledge_id: str,
|
||||
mastery_map: Dict[str, float],
|
||||
) -> List[KnowledgeNode]:
|
||||
"""
|
||||
学习路径推荐
|
||||
|
||||
基于知识图谱拓扑排序,为学生推荐从当前水平到
|
||||
目标知识点的最优学习路径。
|
||||
|
||||
原则:
|
||||
1. 先补足未掌握的前驱知识点
|
||||
2. 按难度从低到高排序
|
||||
3. 已掌握的知识点可跳过
|
||||
"""
|
||||
# 获取目标知识点的所有前驱
|
||||
all_prereqs = await self.query_prerequisites(
|
||||
target_knowledge_id, max_depth=5
|
||||
)
|
||||
|
||||
# 过滤出未掌握的前驱知识点
|
||||
unmastered = [
|
||||
node for node in all_prereqs
|
||||
if mastery_map.get(node.node_id, 0.0) < 0.6
|
||||
]
|
||||
|
||||
# 按难度从低到高排序
|
||||
unmastered.sort(key=lambda n: n.difficulty)
|
||||
|
||||
# 添加目标知识点本身
|
||||
# target_node = await self._get_knowledge_node(target_knowledge_id)
|
||||
# if target_node:
|
||||
# unmastered.append(target_node)
|
||||
|
||||
logger.info(
|
||||
"学习路径推荐: student=%s, target=%s, 路径长度=%d",
|
||||
student_id,
|
||||
target_knowledge_id,
|
||||
len(unmastered),
|
||||
)
|
||||
|
||||
return unmastered
|
||||
|
||||
async def aggregate_chapter_mastery(
|
||||
self,
|
||||
student_id: str,
|
||||
subject: str,
|
||||
mastery_map: Dict[str, float],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
按章节聚合知识点掌握度
|
||||
|
||||
将知识图谱按章节分组,计算每章的综合掌握度,
|
||||
用于生成章节维度的学情雷达图。
|
||||
"""
|
||||
nodes, _ = await self.query_subject_graph(subject)
|
||||
|
||||
# 按章节分组
|
||||
chapter_map: Dict[str, List[float]] = {}
|
||||
for node in nodes:
|
||||
chapter = node.chapter or "其他"
|
||||
mastery = mastery_map.get(node.node_id, 0.0)
|
||||
chapter_map.setdefault(chapter, []).append(mastery)
|
||||
|
||||
# 计算各章节平均掌握度
|
||||
result = []
|
||||
for chapter, masteries in chapter_map.items():
|
||||
avg_mastery = sum(masteries) / max(len(masteries), 1)
|
||||
result.append({
|
||||
"chapter": chapter,
|
||||
"avg_mastery": round(avg_mastery, 3),
|
||||
"knowledge_count": len(masteries),
|
||||
"mastered_count": sum(1 for m in masteries if m >= 0.6),
|
||||
})
|
||||
|
||||
result.sort(key=lambda x: x["chapter"])
|
||||
return result
|
||||
|
||||
async def close(self) -> None:
|
||||
"""关闭Neo4j连接"""
|
||||
# await self._driver.close()
|
||||
logger.info("知识图谱引擎已关闭")
|
||||
@@ -0,0 +1,541 @@
|
||||
# 自然写教学数据分析与学情诊断系统软件 V1.0
|
||||
# analytics/student_profiler.py - 学生画像分析引擎
|
||||
|
||||
import logging
|
||||
import math
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from datetime import datetime, date, timedelta
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
logger = logging.getLogger("writech.analytics.profiler")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 画像分析数据模型
|
||||
# ============================================================
|
||||
|
||||
@dataclass
|
||||
class ScoreTrend:
|
||||
"""成绩趋势数据点"""
|
||||
date: str
|
||||
score: float
|
||||
subject: str
|
||||
exam_type: str = "" # homework/exam/practice
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubjectAbility:
|
||||
"""科目能力评估"""
|
||||
subject: str
|
||||
overall_score: float = 0.0
|
||||
knowledge_coverage: float = 0.0 # 知识点覆盖率
|
||||
practice_frequency: float = 0.0 # 练习频率(次/周)
|
||||
improvement_rate: float = 0.0 # 进步速率
|
||||
stability: float = 0.0 # 稳定性(分数方差的倒数)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LearningHabit:
|
||||
"""学习习惯画像"""
|
||||
avg_daily_minutes: float = 0.0
|
||||
peak_study_hour: int = 0 # 学习高峰时段(小时)
|
||||
weekly_pattern: List[float] = field(default_factory=list) # 周一~日时长
|
||||
consistency_score: float = 0.0 # 学习规律性评分
|
||||
homework_timeliness: float = 0.0 # 作业及时提交率
|
||||
|
||||
|
||||
@dataclass
|
||||
class WritingAbility:
|
||||
"""书写能力评估"""
|
||||
stroke_order_accuracy: float = 0.0 # 笔顺正确率
|
||||
writing_quality: float = 0.0 # 书写规范性
|
||||
writing_speed: float = 0.0 # 书写速度(字/分)
|
||||
char_structure_score: float = 0.0 # 字形结构评分
|
||||
improvement_trend: str = "stable" # 进步趋势
|
||||
|
||||
|
||||
@dataclass
|
||||
class ComprehensiveProfile:
|
||||
"""综合学情画像"""
|
||||
student_id: str
|
||||
student_name: str
|
||||
class_id: str
|
||||
grade: str
|
||||
school_id: str
|
||||
|
||||
# 综合评分
|
||||
overall_score: float = 0.0
|
||||
rank_in_class: int = 0
|
||||
rank_in_grade: int = 0
|
||||
percentile: float = 0.0
|
||||
|
||||
# 各科能力
|
||||
subject_abilities: List[SubjectAbility] = field(default_factory=list)
|
||||
|
||||
# 学习习惯
|
||||
learning_habit: Optional[LearningHabit] = None
|
||||
|
||||
# 书写能力
|
||||
writing_ability: Optional[WritingAbility] = None
|
||||
|
||||
# 成绩趋势
|
||||
score_trends: List[ScoreTrend] = field(default_factory=list)
|
||||
|
||||
# 分析时间
|
||||
analyzed_at: str = ""
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 画像分析引擎
|
||||
# ============================================================
|
||||
|
||||
class StudentProfiler:
|
||||
"""
|
||||
学生画像分析引擎
|
||||
|
||||
功能:
|
||||
1. 综合学情评分计算
|
||||
2. 各科目能力多维评估
|
||||
3. 学习习惯分析
|
||||
4. 书写能力评估
|
||||
5. 成绩趋势分析与预测
|
||||
6. 班级/年级排名计算
|
||||
"""
|
||||
|
||||
# 各维度权重(用于综合评分计算)
|
||||
WEIGHT_HOMEWORK_SCORE = 0.30 # 作业成绩权重
|
||||
WEIGHT_EXAM_SCORE = 0.35 # 考试成绩权重
|
||||
WEIGHT_PRACTICE = 0.15 # 练习表现权重
|
||||
WEIGHT_WRITING = 0.10 # 书写能力权重
|
||||
WEIGHT_HABIT = 0.10 # 学习习惯权重
|
||||
|
||||
# 评分标准
|
||||
EXCELLENT_THRESHOLD = 90.0
|
||||
GOOD_THRESHOLD = 75.0
|
||||
PASS_THRESHOLD = 60.0
|
||||
|
||||
def __init__(self):
|
||||
"""初始化画像分析引擎"""
|
||||
logger.info("学生画像分析引擎初始化")
|
||||
|
||||
async def build_profile(
|
||||
self,
|
||||
student_id: str,
|
||||
student_info: Dict[str, Any],
|
||||
period_days: int = 30,
|
||||
) -> ComprehensiveProfile:
|
||||
"""
|
||||
构建学生综合画像
|
||||
|
||||
Args:
|
||||
student_id: 学生ID
|
||||
student_info: 学生基本信息
|
||||
period_days: 分析周期(天)
|
||||
|
||||
Returns:
|
||||
综合学情画像
|
||||
"""
|
||||
logger.info(
|
||||
"构建学生画像: %s, 分析周期=%d天", student_id, period_days
|
||||
)
|
||||
|
||||
end_date = date.today()
|
||||
start_date = end_date - timedelta(days=period_days)
|
||||
|
||||
# 1. 获取原始数据
|
||||
homework_data = await self._fetch_homework_data(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
exam_data = await self._fetch_exam_data(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
practice_data = await self._fetch_practice_data(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
writing_data = await self._fetch_writing_data(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
usage_data = await self._fetch_usage_data(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
|
||||
# 2. 分析各维度
|
||||
subject_abilities = self._analyze_subject_abilities(
|
||||
homework_data, exam_data, practice_data
|
||||
)
|
||||
learning_habit = self._analyze_learning_habit(usage_data)
|
||||
writing_ability = self._analyze_writing_ability(writing_data)
|
||||
score_trends = self._analyze_score_trends(
|
||||
homework_data, exam_data
|
||||
)
|
||||
|
||||
# 3. 计算综合评分
|
||||
overall_score = self._calculate_overall_score(
|
||||
subject_abilities, learning_habit, writing_ability
|
||||
)
|
||||
|
||||
# 4. 计算排名
|
||||
rank_in_class, rank_in_grade, percentile = (
|
||||
await self._calculate_rankings(
|
||||
student_id,
|
||||
student_info.get("class_id", ""),
|
||||
student_info.get("grade", ""),
|
||||
overall_score,
|
||||
)
|
||||
)
|
||||
|
||||
profile = ComprehensiveProfile(
|
||||
student_id=student_id,
|
||||
student_name=student_info.get("name", ""),
|
||||
class_id=student_info.get("class_id", ""),
|
||||
grade=student_info.get("grade", ""),
|
||||
school_id=student_info.get("school_id", ""),
|
||||
overall_score=round(overall_score, 1),
|
||||
rank_in_class=rank_in_class,
|
||||
rank_in_grade=rank_in_grade,
|
||||
percentile=round(percentile, 1),
|
||||
subject_abilities=subject_abilities,
|
||||
learning_habit=learning_habit,
|
||||
writing_ability=writing_ability,
|
||||
score_trends=score_trends,
|
||||
analyzed_at=datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
# 5. 写入ClickHouse画像宽表
|
||||
await self._save_profile(profile)
|
||||
|
||||
logger.info(
|
||||
"画像构建完成: %s, 综合评分=%.1f, 班级排名=%d",
|
||||
student_id, overall_score, rank_in_class,
|
||||
)
|
||||
|
||||
return profile
|
||||
|
||||
async def _fetch_homework_data(
|
||||
self, student_id: str, start: date, end: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""从ClickHouse获取作业成绩数据"""
|
||||
# query = """
|
||||
# SELECT subject, score, total_score, submitted_at, is_on_time
|
||||
# FROM homework_submissions
|
||||
# WHERE student_id = %(sid)s
|
||||
# AND submitted_at BETWEEN %(start)s AND %(end)s
|
||||
# ORDER BY submitted_at
|
||||
# """
|
||||
# return await clickhouse_query(query, {
|
||||
# "sid": student_id, "start": str(start), "end": str(end)
|
||||
# })
|
||||
return []
|
||||
|
||||
async def _fetch_exam_data(
|
||||
self, student_id: str, start: date, end: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""从ClickHouse获取考试成绩数据"""
|
||||
return []
|
||||
|
||||
async def _fetch_practice_data(
|
||||
self, student_id: str, start: date, end: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""获取练习(字帖/笔顺)数据"""
|
||||
return []
|
||||
|
||||
async def _fetch_writing_data(
|
||||
self, student_id: str, start: date, end: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""获取书写质量评分数据"""
|
||||
return []
|
||||
|
||||
async def _fetch_usage_data(
|
||||
self, student_id: str, start: date, end: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""获取应用使用时长数据"""
|
||||
return []
|
||||
|
||||
def _analyze_subject_abilities(
|
||||
self,
|
||||
homework_data: List[Dict[str, Any]],
|
||||
exam_data: List[Dict[str, Any]],
|
||||
practice_data: List[Dict[str, Any]],
|
||||
) -> List[SubjectAbility]:
|
||||
"""
|
||||
各科目能力多维评估
|
||||
|
||||
评估维度:
|
||||
- 作业/考试平均分
|
||||
- 知识点覆盖率(已接触/总知识点数)
|
||||
- 练习频率(次/周)
|
||||
- 进步速率(最近30天vs前30天分数差)
|
||||
- 稳定性(分数标准差的倒数归一化)
|
||||
"""
|
||||
subject_map: Dict[str, Dict[str, List[float]]] = {}
|
||||
|
||||
# 按科目聚合作业分数
|
||||
for hw in homework_data:
|
||||
subject = hw.get("subject", "unknown")
|
||||
subject_map.setdefault(subject, {"scores": [], "dates": []})
|
||||
total = hw.get("total_score", 100)
|
||||
score = hw.get("score", 0)
|
||||
normalized = (score / max(total, 1)) * 100
|
||||
subject_map[subject]["scores"].append(normalized)
|
||||
|
||||
# 按科目聚合考试分数
|
||||
for exam in exam_data:
|
||||
subject = exam.get("subject", "unknown")
|
||||
subject_map.setdefault(subject, {"scores": [], "dates": []})
|
||||
total = exam.get("total_score", 100)
|
||||
score = exam.get("score", 0)
|
||||
normalized = (score / max(total, 1)) * 100
|
||||
subject_map[subject]["scores"].append(normalized)
|
||||
|
||||
abilities: List[SubjectAbility] = []
|
||||
for subject, data in subject_map.items():
|
||||
scores = data["scores"]
|
||||
if not scores:
|
||||
continue
|
||||
|
||||
avg_score = sum(scores) / len(scores)
|
||||
|
||||
# 稳定性: 1 / (1 + std_dev) 归一化到0-1
|
||||
variance = sum((s - avg_score) ** 2 for s in scores) / max(
|
||||
len(scores), 1
|
||||
)
|
||||
std_dev = math.sqrt(variance)
|
||||
stability = 1.0 / (1.0 + std_dev / 10) # 归一化
|
||||
|
||||
# 进步速率: 后半段均分 - 前半段均分
|
||||
mid = len(scores) // 2
|
||||
if mid > 0:
|
||||
first_half_avg = sum(scores[:mid]) / mid
|
||||
second_half_avg = sum(scores[mid:]) / max(
|
||||
len(scores) - mid, 1
|
||||
)
|
||||
improvement = second_half_avg - first_half_avg
|
||||
else:
|
||||
improvement = 0.0
|
||||
|
||||
abilities.append(SubjectAbility(
|
||||
subject=subject,
|
||||
overall_score=round(avg_score, 1),
|
||||
stability=round(stability, 3),
|
||||
improvement_rate=round(improvement, 1),
|
||||
))
|
||||
|
||||
return abilities
|
||||
|
||||
def _analyze_learning_habit(
|
||||
self, usage_data: List[Dict[str, Any]]
|
||||
) -> LearningHabit:
|
||||
"""
|
||||
学习习惯分析
|
||||
|
||||
分析维度:
|
||||
- 日均学习时长
|
||||
- 学习高峰时段
|
||||
- 周学习模式(周一到周日)
|
||||
- 学习规律性评分
|
||||
"""
|
||||
if not usage_data:
|
||||
return LearningHabit()
|
||||
|
||||
# 按日期聚合使用时长
|
||||
daily_minutes: Dict[str, float] = {}
|
||||
hourly_counts: Dict[int, int] = {}
|
||||
weekday_minutes: Dict[int, List[float]] = {
|
||||
i: [] for i in range(7)
|
||||
}
|
||||
|
||||
for record in usage_data:
|
||||
date_str = record.get("date", "")
|
||||
minutes = record.get("duration_minutes", 0)
|
||||
hour = record.get("start_hour", 0)
|
||||
|
||||
daily_minutes[date_str] = (
|
||||
daily_minutes.get(date_str, 0) + minutes
|
||||
)
|
||||
hourly_counts[hour] = hourly_counts.get(hour, 0) + 1
|
||||
|
||||
# 日均时长
|
||||
total_days = max(len(daily_minutes), 1)
|
||||
avg_daily = sum(daily_minutes.values()) / total_days
|
||||
|
||||
# 学习高峰时段
|
||||
peak_hour = max(
|
||||
hourly_counts, key=hourly_counts.get, default=0
|
||||
)
|
||||
|
||||
# 学习规律性: 日均时长的变异系数越小越规律
|
||||
if daily_minutes:
|
||||
values = list(daily_minutes.values())
|
||||
mean_val = sum(values) / len(values)
|
||||
variance = sum((v - mean_val) ** 2 for v in values) / len(
|
||||
values
|
||||
)
|
||||
std_val = math.sqrt(variance)
|
||||
cv = std_val / max(mean_val, 1)
|
||||
consistency = max(0.0, 1.0 - cv) # 变异系数越小规律性越高
|
||||
else:
|
||||
consistency = 0.0
|
||||
|
||||
return LearningHabit(
|
||||
avg_daily_minutes=round(avg_daily, 1),
|
||||
peak_study_hour=peak_hour,
|
||||
consistency_score=round(consistency, 3),
|
||||
)
|
||||
|
||||
def _analyze_writing_ability(
|
||||
self, writing_data: List[Dict[str, Any]]
|
||||
) -> WritingAbility:
|
||||
"""
|
||||
书写能力评估
|
||||
|
||||
基于笔顺准确率、书写规范性评分、书写速度等维度综合评估。
|
||||
通过对比最近和较早的数据判断进步趋势。
|
||||
"""
|
||||
if not writing_data:
|
||||
return WritingAbility()
|
||||
|
||||
# 计算各维度平均值
|
||||
stroke_scores = [
|
||||
d.get("stroke_order_score", 0) for d in writing_data
|
||||
]
|
||||
quality_scores = [
|
||||
d.get("quality_score", 0) for d in writing_data
|
||||
]
|
||||
speeds = [d.get("speed", 0) for d in writing_data]
|
||||
structure_scores = [
|
||||
d.get("structure_score", 0) for d in writing_data
|
||||
]
|
||||
|
||||
avg_stroke = sum(stroke_scores) / max(len(stroke_scores), 1)
|
||||
avg_quality = sum(quality_scores) / max(len(quality_scores), 1)
|
||||
avg_speed = sum(speeds) / max(len(speeds), 1)
|
||||
avg_structure = sum(structure_scores) / max(
|
||||
len(structure_scores), 1
|
||||
)
|
||||
|
||||
# 判断趋势: 后半段 vs 前半段
|
||||
mid = len(quality_scores) // 2
|
||||
if mid > 0:
|
||||
early_avg = sum(quality_scores[:mid]) / mid
|
||||
recent_avg = sum(quality_scores[mid:]) / max(
|
||||
len(quality_scores) - mid, 1
|
||||
)
|
||||
if recent_avg - early_avg > 3:
|
||||
trend = "improving"
|
||||
elif early_avg - recent_avg > 3:
|
||||
trend = "declining"
|
||||
else:
|
||||
trend = "stable"
|
||||
else:
|
||||
trend = "stable"
|
||||
|
||||
return WritingAbility(
|
||||
stroke_order_accuracy=round(avg_stroke, 1),
|
||||
writing_quality=round(avg_quality, 1),
|
||||
writing_speed=round(avg_speed, 1),
|
||||
char_structure_score=round(avg_structure, 1),
|
||||
improvement_trend=trend,
|
||||
)
|
||||
|
||||
def _analyze_score_trends(
|
||||
self,
|
||||
homework_data: List[Dict[str, Any]],
|
||||
exam_data: List[Dict[str, Any]],
|
||||
) -> List[ScoreTrend]:
|
||||
"""生成成绩趋势数据"""
|
||||
trends: List[ScoreTrend] = []
|
||||
|
||||
for hw in homework_data:
|
||||
total = hw.get("total_score", 100)
|
||||
score = hw.get("score", 0)
|
||||
normalized = (score / max(total, 1)) * 100
|
||||
trends.append(ScoreTrend(
|
||||
date=hw.get("submitted_at", "")[:10],
|
||||
score=round(normalized, 1),
|
||||
subject=hw.get("subject", ""),
|
||||
exam_type="homework",
|
||||
))
|
||||
|
||||
for exam in exam_data:
|
||||
total = exam.get("total_score", 100)
|
||||
score = exam.get("score", 0)
|
||||
normalized = (score / max(total, 1)) * 100
|
||||
trends.append(ScoreTrend(
|
||||
date=exam.get("exam_date", "")[:10],
|
||||
score=round(normalized, 1),
|
||||
subject=exam.get("subject", ""),
|
||||
exam_type="exam",
|
||||
))
|
||||
|
||||
# 按日期排序
|
||||
trends.sort(key=lambda t: t.date)
|
||||
return trends
|
||||
|
||||
def _calculate_overall_score(
|
||||
self,
|
||||
subject_abilities: List[SubjectAbility],
|
||||
learning_habit: LearningHabit,
|
||||
writing_ability: WritingAbility,
|
||||
) -> float:
|
||||
"""
|
||||
计算综合评分(百分制)
|
||||
|
||||
加权公式:
|
||||
综合分 = 作业成绩×0.30 + 考试成绩×0.35 + 练习×0.15
|
||||
+ 书写×0.10 + 学习习惯×0.10
|
||||
"""
|
||||
# 作业/考试平均分
|
||||
if subject_abilities:
|
||||
academic_avg = sum(
|
||||
a.overall_score for a in subject_abilities
|
||||
) / len(subject_abilities)
|
||||
else:
|
||||
academic_avg = 0.0
|
||||
|
||||
# 书写能力评分(归一化到百分制)
|
||||
writing_score = writing_ability.writing_quality
|
||||
|
||||
# 学习习惯评分(规律性×100)
|
||||
habit_score = learning_habit.consistency_score * 100
|
||||
|
||||
# 加权综合
|
||||
overall = (
|
||||
academic_avg * (self.WEIGHT_HOMEWORK_SCORE + self.WEIGHT_EXAM_SCORE)
|
||||
+ academic_avg * self.WEIGHT_PRACTICE
|
||||
+ writing_score * self.WEIGHT_WRITING
|
||||
+ habit_score * self.WEIGHT_HABIT
|
||||
)
|
||||
|
||||
return min(100.0, max(0.0, overall))
|
||||
|
||||
async def _calculate_rankings(
|
||||
self,
|
||||
student_id: str,
|
||||
class_id: str,
|
||||
grade: str,
|
||||
score: float,
|
||||
) -> Tuple[int, int, float]:
|
||||
"""
|
||||
计算班级排名和年级百分位排名
|
||||
|
||||
从ClickHouse查询同班和同年级学生的综合评分,
|
||||
计算当前学生的排名位置。
|
||||
"""
|
||||
# 查询同班学生评分
|
||||
# class_scores = await query_class_scores(class_id)
|
||||
# class_rank = sum(1 for s in class_scores if s > score) + 1
|
||||
|
||||
# 查询同年级学生评分
|
||||
# grade_scores = await query_grade_scores(grade)
|
||||
# grade_rank = sum(1 for s in grade_scores if s > score) + 1
|
||||
# percentile = (1 - grade_rank / max(len(grade_scores), 1)) * 100
|
||||
|
||||
return 0, 0, 0.0
|
||||
|
||||
async def _save_profile(self, profile: ComprehensiveProfile) -> None:
|
||||
"""将画像数据写入ClickHouse画像宽表"""
|
||||
# clickhouse_client.execute(
|
||||
# "INSERT INTO student_profile VALUES",
|
||||
# [profile_to_row(profile)],
|
||||
# )
|
||||
pass
|
||||
@@ -0,0 +1,460 @@
|
||||
# 自然写教学数据分析与学情诊断系统软件 V1.0
|
||||
# analytics/writing_growth.py - 书写能力成长评测引擎
|
||||
|
||||
import logging
|
||||
import math
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from datetime import datetime, date, timedelta
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
logger = logging.getLogger("writech.analytics.writing_growth")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 书写成长数据模型
|
||||
# ============================================================
|
||||
|
||||
@dataclass
|
||||
class WritingSnapshot:
|
||||
"""书写能力时间切片"""
|
||||
date: str
|
||||
stroke_order_accuracy: float = 0.0
|
||||
writing_quality: float = 0.0
|
||||
writing_speed: float = 0.0
|
||||
char_structure: float = 0.0
|
||||
practice_count: int = 0
|
||||
total_chars: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class CharacterProgress:
|
||||
"""单字书写进步记录"""
|
||||
character: str
|
||||
first_score: float
|
||||
latest_score: float
|
||||
best_score: float
|
||||
practice_count: int
|
||||
improvement: float # latest - first
|
||||
mastery_level: str # beginner/intermediate/advanced/master
|
||||
|
||||
|
||||
@dataclass
|
||||
class WritingGrowthReport:
|
||||
"""书写成长评测报告"""
|
||||
student_id: str
|
||||
period_start: str
|
||||
period_end: str
|
||||
|
||||
# 总体评级
|
||||
overall_level: str = "" # 初学/入门/进阶/优秀/精通
|
||||
overall_score: float = 0.0
|
||||
overall_trend: str = "stable"
|
||||
|
||||
# 各维度评分与趋势
|
||||
stroke_order_score: float = 0.0
|
||||
stroke_order_trend: str = "stable"
|
||||
quality_score: float = 0.0
|
||||
quality_trend: str = "stable"
|
||||
speed_score: float = 0.0
|
||||
speed_trend: str = "stable"
|
||||
structure_score: float = 0.0
|
||||
structure_trend: str = "stable"
|
||||
|
||||
# 时序数据
|
||||
snapshots: List[WritingSnapshot] = field(default_factory=list)
|
||||
|
||||
# 单字进步排行
|
||||
most_improved_chars: List[CharacterProgress] = field(
|
||||
default_factory=list
|
||||
)
|
||||
needs_practice_chars: List[CharacterProgress] = field(
|
||||
default_factory=list
|
||||
)
|
||||
|
||||
# 练习统计
|
||||
total_practice_sessions: int = 0
|
||||
total_characters_written: int = 0
|
||||
avg_daily_practice_minutes: float = 0.0
|
||||
|
||||
# 生成时间
|
||||
analyzed_at: str = ""
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 书写成长评测引擎
|
||||
# ============================================================
|
||||
|
||||
class WritingGrowthAnalyzer:
|
||||
"""
|
||||
书写能力成长评测引擎
|
||||
|
||||
功能:
|
||||
1. 多维度书写能力评分(笔顺、规范性、速度、结构)
|
||||
2. 成长趋势分析(移动平均法平滑噪声)
|
||||
3. 单字进步追踪
|
||||
4. 书写等级评定
|
||||
5. 书写问题诊断
|
||||
"""
|
||||
|
||||
# 书写等级评定标准
|
||||
LEVEL_THRESHOLDS = {
|
||||
"精通": 95.0,
|
||||
"优秀": 85.0,
|
||||
"进阶": 70.0,
|
||||
"入门": 50.0,
|
||||
"初学": 0.0,
|
||||
}
|
||||
|
||||
# 各维度权重
|
||||
WEIGHTS = {
|
||||
"stroke_order": 0.25,
|
||||
"quality": 0.35,
|
||||
"speed": 0.15,
|
||||
"structure": 0.25,
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
logger.info("书写成长评测引擎初始化")
|
||||
|
||||
async def analyze_growth(
|
||||
self,
|
||||
student_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
granularity: str = "weekly",
|
||||
) -> WritingGrowthReport:
|
||||
"""
|
||||
分析学生书写能力成长情况
|
||||
|
||||
Args:
|
||||
student_id: 学生ID
|
||||
start_date: 分析起始日期
|
||||
end_date: 分析结束日期
|
||||
granularity: 时间粒度(daily/weekly/monthly)
|
||||
|
||||
Returns:
|
||||
书写成长评测报告
|
||||
"""
|
||||
logger.info(
|
||||
"书写成长分析: student=%s, %s~%s, 粒度=%s",
|
||||
student_id, start_date, end_date, granularity,
|
||||
)
|
||||
|
||||
# 1. 获取原始书写评分数据
|
||||
raw_data = await self._fetch_writing_scores(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
|
||||
# 2. 按时间粒度聚合
|
||||
snapshots = self._aggregate_by_period(raw_data, granularity)
|
||||
|
||||
# 3. 计算各维度评分和趋势
|
||||
stroke_score, stroke_trend = self._calc_dimension_trend(
|
||||
[s.stroke_order_accuracy for s in snapshots]
|
||||
)
|
||||
quality_score, quality_trend = self._calc_dimension_trend(
|
||||
[s.writing_quality for s in snapshots]
|
||||
)
|
||||
speed_score, speed_trend = self._calc_dimension_trend(
|
||||
[s.writing_speed for s in snapshots]
|
||||
)
|
||||
structure_score, structure_trend = self._calc_dimension_trend(
|
||||
[s.char_structure for s in snapshots]
|
||||
)
|
||||
|
||||
# 4. 计算综合评分
|
||||
overall_score = self._calc_overall_score(
|
||||
stroke_score, quality_score, speed_score, structure_score
|
||||
)
|
||||
overall_level = self._determine_level(overall_score)
|
||||
overall_trend = self._determine_overall_trend(snapshots)
|
||||
|
||||
# 5. 分析单字进步
|
||||
char_data = await self._fetch_character_scores(
|
||||
student_id, start_date, end_date
|
||||
)
|
||||
most_improved, needs_practice = self._analyze_char_progress(
|
||||
char_data
|
||||
)
|
||||
|
||||
# 6. 练习统计
|
||||
total_sessions = sum(s.practice_count for s in snapshots)
|
||||
total_chars = sum(s.total_chars for s in snapshots)
|
||||
days = max(
|
||||
(
|
||||
datetime.fromisoformat(end_date)
|
||||
- datetime.fromisoformat(start_date)
|
||||
).days,
|
||||
1,
|
||||
)
|
||||
avg_daily = total_chars / days * 0.5 # 估算每日练习分钟
|
||||
|
||||
report = WritingGrowthReport(
|
||||
student_id=student_id,
|
||||
period_start=start_date,
|
||||
period_end=end_date,
|
||||
overall_level=overall_level,
|
||||
overall_score=round(overall_score, 1),
|
||||
overall_trend=overall_trend,
|
||||
stroke_order_score=round(stroke_score, 1),
|
||||
stroke_order_trend=stroke_trend,
|
||||
quality_score=round(quality_score, 1),
|
||||
quality_trend=quality_trend,
|
||||
speed_score=round(speed_score, 1),
|
||||
speed_trend=speed_trend,
|
||||
structure_score=round(structure_score, 1),
|
||||
structure_trend=structure_trend,
|
||||
snapshots=snapshots,
|
||||
most_improved_chars=most_improved[:10],
|
||||
needs_practice_chars=needs_practice[:10],
|
||||
total_practice_sessions=total_sessions,
|
||||
total_characters_written=total_chars,
|
||||
avg_daily_practice_minutes=round(avg_daily, 1),
|
||||
analyzed_at=datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
async def _fetch_writing_scores(
|
||||
self, student_id: str, start: str, end: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""从ClickHouse获取书写评分原始数据"""
|
||||
# query = """
|
||||
# SELECT date, stroke_order_accuracy, writing_quality,
|
||||
# writing_speed, char_structure, practice_count, total_chars
|
||||
# FROM writing_growth
|
||||
# WHERE student_id = %(sid)s
|
||||
# AND date BETWEEN %(start)s AND %(end)s
|
||||
# ORDER BY date
|
||||
# """
|
||||
return []
|
||||
|
||||
async def _fetch_character_scores(
|
||||
self, student_id: str, start: str, end: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""获取单字练习评分数据"""
|
||||
# query = """
|
||||
# SELECT character, score, practice_at
|
||||
# FROM practice_records
|
||||
# WHERE student_id = %(sid)s
|
||||
# AND practice_at BETWEEN %(start)s AND %(end)s
|
||||
# ORDER BY character, practice_at
|
||||
# """
|
||||
return []
|
||||
|
||||
def _aggregate_by_period(
|
||||
self,
|
||||
raw_data: List[Dict[str, Any]],
|
||||
granularity: str,
|
||||
) -> List[WritingSnapshot]:
|
||||
"""按时间粒度聚合书写评分"""
|
||||
if not raw_data:
|
||||
return []
|
||||
|
||||
# 按日期分组
|
||||
period_map: Dict[str, List[Dict[str, Any]]] = {}
|
||||
for record in raw_data:
|
||||
date_str = record.get("date", "")
|
||||
if granularity == "weekly":
|
||||
# 按周分组(取周一日期)
|
||||
dt = datetime.fromisoformat(date_str)
|
||||
week_start = dt - timedelta(days=dt.weekday())
|
||||
period_key = week_start.date().isoformat()
|
||||
elif granularity == "monthly":
|
||||
period_key = date_str[:7] # YYYY-MM
|
||||
else:
|
||||
period_key = date_str
|
||||
|
||||
period_map.setdefault(period_key, []).append(record)
|
||||
|
||||
# 聚合每个周期
|
||||
snapshots: List[WritingSnapshot] = []
|
||||
for period, records in sorted(period_map.items()):
|
||||
n = len(records)
|
||||
snapshot = WritingSnapshot(
|
||||
date=period,
|
||||
stroke_order_accuracy=sum(
|
||||
r.get("stroke_order_accuracy", 0) for r in records
|
||||
) / n,
|
||||
writing_quality=sum(
|
||||
r.get("writing_quality", 0) for r in records
|
||||
) / n,
|
||||
writing_speed=sum(
|
||||
r.get("writing_speed", 0) for r in records
|
||||
) / n,
|
||||
char_structure=sum(
|
||||
r.get("char_structure", 0) for r in records
|
||||
) / n,
|
||||
practice_count=sum(
|
||||
r.get("practice_count", 0) for r in records
|
||||
),
|
||||
total_chars=sum(
|
||||
r.get("total_chars", 0) for r in records
|
||||
),
|
||||
)
|
||||
snapshots.append(snapshot)
|
||||
|
||||
return snapshots
|
||||
|
||||
def _calc_dimension_trend(
|
||||
self, values: List[float]
|
||||
) -> Tuple[float, str]:
|
||||
"""
|
||||
计算某维度的当前评分和趋势
|
||||
|
||||
使用指数移动平均(EMA)平滑数据噪声,
|
||||
对比最近EMA与早期EMA判断趋势。
|
||||
"""
|
||||
if not values:
|
||||
return 0.0, "stable"
|
||||
|
||||
# 指数移动平均(衰减因子0.3)
|
||||
alpha = 0.3
|
||||
ema_values = [values[0]]
|
||||
for i in range(1, len(values)):
|
||||
ema = alpha * values[i] + (1 - alpha) * ema_values[-1]
|
||||
ema_values.append(ema)
|
||||
|
||||
current_score = ema_values[-1]
|
||||
|
||||
# 趋势判断:对比前半段和后半段的EMA均值
|
||||
if len(ema_values) >= 4:
|
||||
mid = len(ema_values) // 2
|
||||
early_avg = sum(ema_values[:mid]) / mid
|
||||
recent_avg = sum(ema_values[mid:]) / (len(ema_values) - mid)
|
||||
diff = recent_avg - early_avg
|
||||
|
||||
if diff > 3:
|
||||
trend = "improving"
|
||||
elif diff < -3:
|
||||
trend = "declining"
|
||||
else:
|
||||
trend = "stable"
|
||||
else:
|
||||
trend = "stable"
|
||||
|
||||
return current_score, trend
|
||||
|
||||
def _calc_overall_score(
|
||||
self,
|
||||
stroke: float,
|
||||
quality: float,
|
||||
speed: float,
|
||||
structure: float,
|
||||
) -> float:
|
||||
"""加权计算综合书写评分"""
|
||||
return (
|
||||
stroke * self.WEIGHTS["stroke_order"]
|
||||
+ quality * self.WEIGHTS["quality"]
|
||||
+ speed * self.WEIGHTS["speed"]
|
||||
+ structure * self.WEIGHTS["structure"]
|
||||
)
|
||||
|
||||
def _determine_level(self, score: float) -> str:
|
||||
"""根据综合评分确定书写等级"""
|
||||
for level, threshold in self.LEVEL_THRESHOLDS.items():
|
||||
if score >= threshold:
|
||||
return level
|
||||
return "初学"
|
||||
|
||||
def _determine_overall_trend(
|
||||
self, snapshots: List[WritingSnapshot]
|
||||
) -> str:
|
||||
"""判断总体趋势"""
|
||||
if len(snapshots) < 2:
|
||||
return "stable"
|
||||
|
||||
# 计算每个快照的综合分
|
||||
scores = []
|
||||
for s in snapshots:
|
||||
overall = self._calc_overall_score(
|
||||
s.stroke_order_accuracy,
|
||||
s.writing_quality,
|
||||
s.writing_speed,
|
||||
s.char_structure,
|
||||
)
|
||||
scores.append(overall)
|
||||
|
||||
# 简单线性回归斜率判断趋势
|
||||
n = len(scores)
|
||||
x_mean = (n - 1) / 2
|
||||
y_mean = sum(scores) / n
|
||||
numerator = sum(
|
||||
(i - x_mean) * (scores[i] - y_mean) for i in range(n)
|
||||
)
|
||||
denominator = sum((i - x_mean) ** 2 for i in range(n))
|
||||
|
||||
if denominator == 0:
|
||||
return "stable"
|
||||
|
||||
slope = numerator / denominator
|
||||
|
||||
if slope > 0.5:
|
||||
return "improving"
|
||||
elif slope < -0.5:
|
||||
return "declining"
|
||||
return "stable"
|
||||
|
||||
def _analyze_char_progress(
|
||||
self, char_data: List[Dict[str, Any]]
|
||||
) -> Tuple[List[CharacterProgress], List[CharacterProgress]]:
|
||||
"""
|
||||
分析单字进步情况
|
||||
|
||||
对每个练习过的汉字,比较首次评分和最近评分,
|
||||
找出进步最大的字和仍需练习的字。
|
||||
"""
|
||||
char_map: Dict[str, List[Tuple[float, str]]] = {}
|
||||
|
||||
for record in char_data:
|
||||
char = record.get("character", "")
|
||||
score = record.get("score", 0.0)
|
||||
practice_at = record.get("practice_at", "")
|
||||
char_map.setdefault(char, []).append((score, practice_at))
|
||||
|
||||
progress_list: List[CharacterProgress] = []
|
||||
|
||||
for char, entries in char_map.items():
|
||||
# 按时间排序
|
||||
entries.sort(key=lambda e: e[1])
|
||||
|
||||
first_score = entries[0][0]
|
||||
latest_score = entries[-1][0]
|
||||
best_score = max(e[0] for e in entries)
|
||||
improvement = latest_score - first_score
|
||||
|
||||
# 掌握等级判定
|
||||
if latest_score >= 90:
|
||||
level = "master"
|
||||
elif latest_score >= 75:
|
||||
level = "advanced"
|
||||
elif latest_score >= 60:
|
||||
level = "intermediate"
|
||||
else:
|
||||
level = "beginner"
|
||||
|
||||
progress_list.append(CharacterProgress(
|
||||
character=char,
|
||||
first_score=first_score,
|
||||
latest_score=latest_score,
|
||||
best_score=best_score,
|
||||
practice_count=len(entries),
|
||||
improvement=round(improvement, 1),
|
||||
mastery_level=level,
|
||||
))
|
||||
|
||||
# 按进步幅度降序排列(进步最大的)
|
||||
most_improved = sorted(
|
||||
progress_list, key=lambda p: p.improvement, reverse=True
|
||||
)
|
||||
|
||||
# 仍需练习的(最新分低于70且练习次数>3)
|
||||
needs_practice = sorted(
|
||||
[
|
||||
p for p in progress_list
|
||||
if p.latest_score < 70 and p.practice_count > 3
|
||||
],
|
||||
key=lambda p: p.latest_score,
|
||||
)
|
||||
|
||||
return most_improved, needs_practice
|
||||
Reference in New Issue
Block a user