software copyright
This commit is contained in:
@@ -0,0 +1,365 @@
|
||||
# 自然写教学数据分析与学情诊断系统软件 V1.0
|
||||
# analytics/knowledge_graph.py - Neo4j知识图谱查询与推理引擎
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
logger = logging.getLogger("writech.analytics.knowledge_graph")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 知识图谱数据模型
|
||||
# ============================================================
|
||||
|
||||
@dataclass
|
||||
class KnowledgeNode:
|
||||
"""知识点节点"""
|
||||
node_id: str
|
||||
name: str
|
||||
subject: str
|
||||
grade: str
|
||||
chapter: str = ""
|
||||
section: str = ""
|
||||
difficulty: float = 0.5 # 难度系数 0-1
|
||||
importance: float = 0.5 # 重要程度 0-1
|
||||
description: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class KnowledgeEdge:
|
||||
"""知识点关系边"""
|
||||
source_id: str
|
||||
target_id: str
|
||||
relation_type: str # prerequisite/includes/related
|
||||
weight: float = 1.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class StudentMastery:
|
||||
"""学生对某知识点的掌握度"""
|
||||
student_id: str
|
||||
knowledge_id: str
|
||||
mastery_level: float = 0.0 # 掌握度 0-1
|
||||
practice_count: int = 0
|
||||
correct_count: int = 0
|
||||
error_count: int = 0
|
||||
last_practice: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ErrorAttribution:
|
||||
"""错题归因结果"""
|
||||
question_id: str
|
||||
error_knowledge_ids: List[str] # 直接关联知识点
|
||||
root_cause_ids: List[str] # 根因知识点(前驱未掌握)
|
||||
suggestion: str = ""
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 知识图谱引擎
|
||||
# ============================================================
|
||||
|
||||
class KnowledgeGraphEngine:
|
||||
"""
|
||||
Neo4j知识图谱引擎
|
||||
|
||||
负责:
|
||||
1. 知识点图谱的查询与遍历
|
||||
2. 错题归因推理(追溯前驱知识点)
|
||||
3. 学习路径推荐
|
||||
4. 知识点掌握度聚合计算
|
||||
"""
|
||||
|
||||
def __init__(self, uri: str, user: str, password: str):
|
||||
"""初始化Neo4j连接"""
|
||||
self.uri = uri
|
||||
self.user = user
|
||||
self.password = password
|
||||
# self._driver = GraphDatabase.driver(uri, auth=(user, password))
|
||||
logger.info("知识图谱引擎初始化: %s", uri)
|
||||
|
||||
async def query_subject_graph(
|
||||
self, subject: str, grade: Optional[str] = None
|
||||
) -> Tuple[List[KnowledgeNode], List[KnowledgeEdge]]:
|
||||
"""
|
||||
查询某科目的完整知识图谱结构
|
||||
|
||||
Args:
|
||||
subject: 科目名称
|
||||
grade: 可选年级过滤
|
||||
|
||||
Returns:
|
||||
(节点列表, 边列表)
|
||||
"""
|
||||
logger.info("查询知识图谱: subject=%s, grade=%s", subject, grade)
|
||||
|
||||
# Cypher查询:获取所有知识点节点
|
||||
node_query = """
|
||||
MATCH (k:KnowledgePoint {subject: $subject})
|
||||
WHERE ($grade IS NULL OR k.grade = $grade)
|
||||
RETURN k.id AS id, k.name AS name, k.subject AS subject,
|
||||
k.grade AS grade, k.chapter AS chapter, k.section AS section,
|
||||
k.difficulty AS difficulty, k.importance AS importance,
|
||||
k.description AS description
|
||||
ORDER BY k.chapter, k.section
|
||||
"""
|
||||
|
||||
# Cypher查询:获取所有关系边
|
||||
edge_query = """
|
||||
MATCH (a:KnowledgePoint {subject: $subject})-[r]->(b:KnowledgePoint)
|
||||
WHERE ($grade IS NULL OR a.grade = $grade)
|
||||
RETURN a.id AS source, b.id AS target, type(r) AS relation,
|
||||
r.weight AS weight
|
||||
"""
|
||||
|
||||
nodes: List[KnowledgeNode] = []
|
||||
edges: List[KnowledgeEdge] = []
|
||||
|
||||
# async with self._driver.async_session() as session:
|
||||
# # 查询节点
|
||||
# result = await session.run(node_query, subject=subject, grade=grade)
|
||||
# async for record in result:
|
||||
# nodes.append(KnowledgeNode(
|
||||
# node_id=record["id"],
|
||||
# name=record["name"],
|
||||
# ...
|
||||
# ))
|
||||
#
|
||||
# # 查询边
|
||||
# result = await session.run(edge_query, subject=subject, grade=grade)
|
||||
# async for record in result:
|
||||
# edges.append(KnowledgeEdge(
|
||||
# source_id=record["source"],
|
||||
# target_id=record["target"],
|
||||
# relation_type=record["relation"],
|
||||
# weight=record["weight"] or 1.0,
|
||||
# ))
|
||||
|
||||
logger.info(
|
||||
"图谱查询完成: %d节点, %d边", len(nodes), len(edges)
|
||||
)
|
||||
return nodes, edges
|
||||
|
||||
async def query_prerequisites(
|
||||
self, knowledge_id: str, max_depth: int = 3
|
||||
) -> List[KnowledgeNode]:
|
||||
"""
|
||||
查询知识点的前驱依赖链(递归向上追溯)
|
||||
|
||||
用于错题归因:当某知识点未掌握时,追溯其前驱
|
||||
知识点是否也未掌握,找到根本原因。
|
||||
|
||||
Args:
|
||||
knowledge_id: 目标知识点ID
|
||||
max_depth: 最大追溯深度
|
||||
|
||||
Returns:
|
||||
前驱知识点列表(按依赖顺序排列)
|
||||
"""
|
||||
query = """
|
||||
MATCH path = (target:KnowledgePoint {id: $kid})
|
||||
<-[:PREREQUISITE*1..$depth]-(prereq:KnowledgePoint)
|
||||
RETURN prereq.id AS id, prereq.name AS name,
|
||||
prereq.subject AS subject, prereq.grade AS grade,
|
||||
prereq.chapter AS chapter, prereq.difficulty AS difficulty,
|
||||
length(path) AS distance
|
||||
ORDER BY distance ASC
|
||||
"""
|
||||
|
||||
prerequisites: List[KnowledgeNode] = []
|
||||
# async with self._driver.async_session() as session:
|
||||
# result = await session.run(
|
||||
# query, kid=knowledge_id, depth=max_depth
|
||||
# )
|
||||
# async for record in result:
|
||||
# prerequisites.append(KnowledgeNode(
|
||||
# node_id=record["id"],
|
||||
# name=record["name"],
|
||||
# ...
|
||||
# ))
|
||||
|
||||
logger.debug(
|
||||
"知识点 %s 的前驱链: %d个",
|
||||
knowledge_id,
|
||||
len(prerequisites),
|
||||
)
|
||||
return prerequisites
|
||||
|
||||
async def attribute_errors(
|
||||
self,
|
||||
student_id: str,
|
||||
error_question_ids: List[str],
|
||||
mastery_map: Dict[str, float],
|
||||
) -> List[ErrorAttribution]:
|
||||
"""
|
||||
错题归因分析
|
||||
|
||||
对每道错题:
|
||||
1. 查找该题关联的知识点
|
||||
2. 查找这些知识点的前驱知识点
|
||||
3. 检查前驱知识点的掌握度
|
||||
4. 如果前驱也未掌握,则认为是根因
|
||||
|
||||
Args:
|
||||
student_id: 学生ID
|
||||
error_question_ids: 错题ID列表
|
||||
mastery_map: {knowledge_id: mastery_level} 掌握度映射
|
||||
|
||||
Returns:
|
||||
错题归因结果列表
|
||||
"""
|
||||
logger.info(
|
||||
"错题归因: student=%s, 错题数=%d",
|
||||
student_id,
|
||||
len(error_question_ids),
|
||||
)
|
||||
|
||||
attributions: List[ErrorAttribution] = []
|
||||
mastery_threshold = 0.6 # 掌握度阈值(低于此视为未掌握)
|
||||
|
||||
for question_id in error_question_ids:
|
||||
# 查询错题关联的知识点
|
||||
# question_kps = await self._query_question_knowledge(question_id)
|
||||
question_kps: List[str] = []
|
||||
|
||||
root_causes: List[str] = []
|
||||
|
||||
for kp_id in question_kps:
|
||||
mastery = mastery_map.get(kp_id, 0.0)
|
||||
|
||||
if mastery < mastery_threshold:
|
||||
# 该知识点未掌握,追溯前驱
|
||||
prereqs = await self.query_prerequisites(kp_id)
|
||||
|
||||
for prereq in prereqs:
|
||||
prereq_mastery = mastery_map.get(
|
||||
prereq.node_id, 0.0
|
||||
)
|
||||
if prereq_mastery < mastery_threshold:
|
||||
# 前驱也未掌握,记为根因
|
||||
if prereq.node_id not in root_causes:
|
||||
root_causes.append(prereq.node_id)
|
||||
|
||||
# 生成归因建议
|
||||
suggestion = self._generate_suggestion(
|
||||
question_kps, root_causes, mastery_map
|
||||
)
|
||||
|
||||
attributions.append(ErrorAttribution(
|
||||
question_id=question_id,
|
||||
error_knowledge_ids=question_kps,
|
||||
root_cause_ids=root_causes,
|
||||
suggestion=suggestion,
|
||||
))
|
||||
|
||||
return attributions
|
||||
|
||||
def _generate_suggestion(
|
||||
self,
|
||||
knowledge_ids: List[str],
|
||||
root_cause_ids: List[str],
|
||||
mastery_map: Dict[str, float],
|
||||
) -> str:
|
||||
"""根据归因结果生成学习建议"""
|
||||
if root_cause_ids:
|
||||
return (
|
||||
f"建议先复习前驱知识点(共{len(root_cause_ids)}个),"
|
||||
f"夯实基础后再针对性练习当前知识点"
|
||||
)
|
||||
elif knowledge_ids:
|
||||
avg_mastery = sum(
|
||||
mastery_map.get(k, 0) for k in knowledge_ids
|
||||
) / max(len(knowledge_ids), 1)
|
||||
if avg_mastery < 0.3:
|
||||
return "该知识点掌握度较低,建议从基础概念开始系统学习"
|
||||
elif avg_mastery < 0.6:
|
||||
return "该知识点已有一定基础,建议加强专项练习巩固提升"
|
||||
else:
|
||||
return "知识点掌握较好,本次错误可能是粗心或审题不清"
|
||||
return "暂无具体建议"
|
||||
|
||||
async def recommend_learning_path(
|
||||
self,
|
||||
student_id: str,
|
||||
target_knowledge_id: str,
|
||||
mastery_map: Dict[str, float],
|
||||
) -> List[KnowledgeNode]:
|
||||
"""
|
||||
学习路径推荐
|
||||
|
||||
基于知识图谱拓扑排序,为学生推荐从当前水平到
|
||||
目标知识点的最优学习路径。
|
||||
|
||||
原则:
|
||||
1. 先补足未掌握的前驱知识点
|
||||
2. 按难度从低到高排序
|
||||
3. 已掌握的知识点可跳过
|
||||
"""
|
||||
# 获取目标知识点的所有前驱
|
||||
all_prereqs = await self.query_prerequisites(
|
||||
target_knowledge_id, max_depth=5
|
||||
)
|
||||
|
||||
# 过滤出未掌握的前驱知识点
|
||||
unmastered = [
|
||||
node for node in all_prereqs
|
||||
if mastery_map.get(node.node_id, 0.0) < 0.6
|
||||
]
|
||||
|
||||
# 按难度从低到高排序
|
||||
unmastered.sort(key=lambda n: n.difficulty)
|
||||
|
||||
# 添加目标知识点本身
|
||||
# target_node = await self._get_knowledge_node(target_knowledge_id)
|
||||
# if target_node:
|
||||
# unmastered.append(target_node)
|
||||
|
||||
logger.info(
|
||||
"学习路径推荐: student=%s, target=%s, 路径长度=%d",
|
||||
student_id,
|
||||
target_knowledge_id,
|
||||
len(unmastered),
|
||||
)
|
||||
|
||||
return unmastered
|
||||
|
||||
async def aggregate_chapter_mastery(
|
||||
self,
|
||||
student_id: str,
|
||||
subject: str,
|
||||
mastery_map: Dict[str, float],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
按章节聚合知识点掌握度
|
||||
|
||||
将知识图谱按章节分组,计算每章的综合掌握度,
|
||||
用于生成章节维度的学情雷达图。
|
||||
"""
|
||||
nodes, _ = await self.query_subject_graph(subject)
|
||||
|
||||
# 按章节分组
|
||||
chapter_map: Dict[str, List[float]] = {}
|
||||
for node in nodes:
|
||||
chapter = node.chapter or "其他"
|
||||
mastery = mastery_map.get(node.node_id, 0.0)
|
||||
chapter_map.setdefault(chapter, []).append(mastery)
|
||||
|
||||
# 计算各章节平均掌握度
|
||||
result = []
|
||||
for chapter, masteries in chapter_map.items():
|
||||
avg_mastery = sum(masteries) / max(len(masteries), 1)
|
||||
result.append({
|
||||
"chapter": chapter,
|
||||
"avg_mastery": round(avg_mastery, 3),
|
||||
"knowledge_count": len(masteries),
|
||||
"mastered_count": sum(1 for m in masteries if m >= 0.6),
|
||||
})
|
||||
|
||||
result.sort(key=lambda x: x["chapter"])
|
||||
return result
|
||||
|
||||
async def close(self) -> None:
|
||||
"""关闭Neo4j连接"""
|
||||
# await self._driver.close()
|
||||
logger.info("知识图谱引擎已关闭")
|
||||
Reference in New Issue
Block a user