366 lines
12 KiB
Python
366 lines
12 KiB
Python
# 自然写教学数据分析与学情诊断系统软件 V1.0
|
|
# analytics/knowledge_graph.py - Neo4j知识图谱查询与推理引擎
|
|
|
|
import logging
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
from dataclasses import dataclass, field
|
|
|
|
logger = logging.getLogger("writech.analytics.knowledge_graph")
|
|
|
|
|
|
# ============================================================
|
|
# 知识图谱数据模型
|
|
# ============================================================
|
|
|
|
@dataclass
|
|
class KnowledgeNode:
|
|
"""知识点节点"""
|
|
node_id: str
|
|
name: str
|
|
subject: str
|
|
grade: str
|
|
chapter: str = ""
|
|
section: str = ""
|
|
difficulty: float = 0.5 # 难度系数 0-1
|
|
importance: float = 0.5 # 重要程度 0-1
|
|
description: str = ""
|
|
|
|
|
|
@dataclass
|
|
class KnowledgeEdge:
|
|
"""知识点关系边"""
|
|
source_id: str
|
|
target_id: str
|
|
relation_type: str # prerequisite/includes/related
|
|
weight: float = 1.0
|
|
|
|
|
|
@dataclass
|
|
class StudentMastery:
|
|
"""学生对某知识点的掌握度"""
|
|
student_id: str
|
|
knowledge_id: str
|
|
mastery_level: float = 0.0 # 掌握度 0-1
|
|
practice_count: int = 0
|
|
correct_count: int = 0
|
|
error_count: int = 0
|
|
last_practice: str = ""
|
|
|
|
|
|
@dataclass
|
|
class ErrorAttribution:
|
|
"""错题归因结果"""
|
|
question_id: str
|
|
error_knowledge_ids: List[str] # 直接关联知识点
|
|
root_cause_ids: List[str] # 根因知识点(前驱未掌握)
|
|
suggestion: str = ""
|
|
|
|
|
|
# ============================================================
|
|
# 知识图谱引擎
|
|
# ============================================================
|
|
|
|
class KnowledgeGraphEngine:
|
|
"""
|
|
Neo4j知识图谱引擎
|
|
|
|
负责:
|
|
1. 知识点图谱的查询与遍历
|
|
2. 错题归因推理(追溯前驱知识点)
|
|
3. 学习路径推荐
|
|
4. 知识点掌握度聚合计算
|
|
"""
|
|
|
|
def __init__(self, uri: str, user: str, password: str):
|
|
"""初始化Neo4j连接"""
|
|
self.uri = uri
|
|
self.user = user
|
|
self.password = password
|
|
# self._driver = GraphDatabase.driver(uri, auth=(user, password))
|
|
logger.info("知识图谱引擎初始化: %s", uri)
|
|
|
|
async def query_subject_graph(
|
|
self, subject: str, grade: Optional[str] = None
|
|
) -> Tuple[List[KnowledgeNode], List[KnowledgeEdge]]:
|
|
"""
|
|
查询某科目的完整知识图谱结构
|
|
|
|
Args:
|
|
subject: 科目名称
|
|
grade: 可选年级过滤
|
|
|
|
Returns:
|
|
(节点列表, 边列表)
|
|
"""
|
|
logger.info("查询知识图谱: subject=%s, grade=%s", subject, grade)
|
|
|
|
# Cypher查询:获取所有知识点节点
|
|
node_query = """
|
|
MATCH (k:KnowledgePoint {subject: $subject})
|
|
WHERE ($grade IS NULL OR k.grade = $grade)
|
|
RETURN k.id AS id, k.name AS name, k.subject AS subject,
|
|
k.grade AS grade, k.chapter AS chapter, k.section AS section,
|
|
k.difficulty AS difficulty, k.importance AS importance,
|
|
k.description AS description
|
|
ORDER BY k.chapter, k.section
|
|
"""
|
|
|
|
# Cypher查询:获取所有关系边
|
|
edge_query = """
|
|
MATCH (a:KnowledgePoint {subject: $subject})-[r]->(b:KnowledgePoint)
|
|
WHERE ($grade IS NULL OR a.grade = $grade)
|
|
RETURN a.id AS source, b.id AS target, type(r) AS relation,
|
|
r.weight AS weight
|
|
"""
|
|
|
|
nodes: List[KnowledgeNode] = []
|
|
edges: List[KnowledgeEdge] = []
|
|
|
|
# async with self._driver.async_session() as session:
|
|
# # 查询节点
|
|
# result = await session.run(node_query, subject=subject, grade=grade)
|
|
# async for record in result:
|
|
# nodes.append(KnowledgeNode(
|
|
# node_id=record["id"],
|
|
# name=record["name"],
|
|
# ...
|
|
# ))
|
|
#
|
|
# # 查询边
|
|
# result = await session.run(edge_query, subject=subject, grade=grade)
|
|
# async for record in result:
|
|
# edges.append(KnowledgeEdge(
|
|
# source_id=record["source"],
|
|
# target_id=record["target"],
|
|
# relation_type=record["relation"],
|
|
# weight=record["weight"] or 1.0,
|
|
# ))
|
|
|
|
logger.info(
|
|
"图谱查询完成: %d节点, %d边", len(nodes), len(edges)
|
|
)
|
|
return nodes, edges
|
|
|
|
async def query_prerequisites(
|
|
self, knowledge_id: str, max_depth: int = 3
|
|
) -> List[KnowledgeNode]:
|
|
"""
|
|
查询知识点的前驱依赖链(递归向上追溯)
|
|
|
|
用于错题归因:当某知识点未掌握时,追溯其前驱
|
|
知识点是否也未掌握,找到根本原因。
|
|
|
|
Args:
|
|
knowledge_id: 目标知识点ID
|
|
max_depth: 最大追溯深度
|
|
|
|
Returns:
|
|
前驱知识点列表(按依赖顺序排列)
|
|
"""
|
|
query = """
|
|
MATCH path = (target:KnowledgePoint {id: $kid})
|
|
<-[:PREREQUISITE*1..$depth]-(prereq:KnowledgePoint)
|
|
RETURN prereq.id AS id, prereq.name AS name,
|
|
prereq.subject AS subject, prereq.grade AS grade,
|
|
prereq.chapter AS chapter, prereq.difficulty AS difficulty,
|
|
length(path) AS distance
|
|
ORDER BY distance ASC
|
|
"""
|
|
|
|
prerequisites: List[KnowledgeNode] = []
|
|
# async with self._driver.async_session() as session:
|
|
# result = await session.run(
|
|
# query, kid=knowledge_id, depth=max_depth
|
|
# )
|
|
# async for record in result:
|
|
# prerequisites.append(KnowledgeNode(
|
|
# node_id=record["id"],
|
|
# name=record["name"],
|
|
# ...
|
|
# ))
|
|
|
|
logger.debug(
|
|
"知识点 %s 的前驱链: %d个",
|
|
knowledge_id,
|
|
len(prerequisites),
|
|
)
|
|
return prerequisites
|
|
|
|
async def attribute_errors(
|
|
self,
|
|
student_id: str,
|
|
error_question_ids: List[str],
|
|
mastery_map: Dict[str, float],
|
|
) -> List[ErrorAttribution]:
|
|
"""
|
|
错题归因分析
|
|
|
|
对每道错题:
|
|
1. 查找该题关联的知识点
|
|
2. 查找这些知识点的前驱知识点
|
|
3. 检查前驱知识点的掌握度
|
|
4. 如果前驱也未掌握,则认为是根因
|
|
|
|
Args:
|
|
student_id: 学生ID
|
|
error_question_ids: 错题ID列表
|
|
mastery_map: {knowledge_id: mastery_level} 掌握度映射
|
|
|
|
Returns:
|
|
错题归因结果列表
|
|
"""
|
|
logger.info(
|
|
"错题归因: student=%s, 错题数=%d",
|
|
student_id,
|
|
len(error_question_ids),
|
|
)
|
|
|
|
attributions: List[ErrorAttribution] = []
|
|
mastery_threshold = 0.6 # 掌握度阈值(低于此视为未掌握)
|
|
|
|
for question_id in error_question_ids:
|
|
# 查询错题关联的知识点
|
|
# question_kps = await self._query_question_knowledge(question_id)
|
|
question_kps: List[str] = []
|
|
|
|
root_causes: List[str] = []
|
|
|
|
for kp_id in question_kps:
|
|
mastery = mastery_map.get(kp_id, 0.0)
|
|
|
|
if mastery < mastery_threshold:
|
|
# 该知识点未掌握,追溯前驱
|
|
prereqs = await self.query_prerequisites(kp_id)
|
|
|
|
for prereq in prereqs:
|
|
prereq_mastery = mastery_map.get(
|
|
prereq.node_id, 0.0
|
|
)
|
|
if prereq_mastery < mastery_threshold:
|
|
# 前驱也未掌握,记为根因
|
|
if prereq.node_id not in root_causes:
|
|
root_causes.append(prereq.node_id)
|
|
|
|
# 生成归因建议
|
|
suggestion = self._generate_suggestion(
|
|
question_kps, root_causes, mastery_map
|
|
)
|
|
|
|
attributions.append(ErrorAttribution(
|
|
question_id=question_id,
|
|
error_knowledge_ids=question_kps,
|
|
root_cause_ids=root_causes,
|
|
suggestion=suggestion,
|
|
))
|
|
|
|
return attributions
|
|
|
|
def _generate_suggestion(
|
|
self,
|
|
knowledge_ids: List[str],
|
|
root_cause_ids: List[str],
|
|
mastery_map: Dict[str, float],
|
|
) -> str:
|
|
"""根据归因结果生成学习建议"""
|
|
if root_cause_ids:
|
|
return (
|
|
f"建议先复习前驱知识点(共{len(root_cause_ids)}个),"
|
|
f"夯实基础后再针对性练习当前知识点"
|
|
)
|
|
elif knowledge_ids:
|
|
avg_mastery = sum(
|
|
mastery_map.get(k, 0) for k in knowledge_ids
|
|
) / max(len(knowledge_ids), 1)
|
|
if avg_mastery < 0.3:
|
|
return "该知识点掌握度较低,建议从基础概念开始系统学习"
|
|
elif avg_mastery < 0.6:
|
|
return "该知识点已有一定基础,建议加强专项练习巩固提升"
|
|
else:
|
|
return "知识点掌握较好,本次错误可能是粗心或审题不清"
|
|
return "暂无具体建议"
|
|
|
|
async def recommend_learning_path(
|
|
self,
|
|
student_id: str,
|
|
target_knowledge_id: str,
|
|
mastery_map: Dict[str, float],
|
|
) -> List[KnowledgeNode]:
|
|
"""
|
|
学习路径推荐
|
|
|
|
基于知识图谱拓扑排序,为学生推荐从当前水平到
|
|
目标知识点的最优学习路径。
|
|
|
|
原则:
|
|
1. 先补足未掌握的前驱知识点
|
|
2. 按难度从低到高排序
|
|
3. 已掌握的知识点可跳过
|
|
"""
|
|
# 获取目标知识点的所有前驱
|
|
all_prereqs = await self.query_prerequisites(
|
|
target_knowledge_id, max_depth=5
|
|
)
|
|
|
|
# 过滤出未掌握的前驱知识点
|
|
unmastered = [
|
|
node for node in all_prereqs
|
|
if mastery_map.get(node.node_id, 0.0) < 0.6
|
|
]
|
|
|
|
# 按难度从低到高排序
|
|
unmastered.sort(key=lambda n: n.difficulty)
|
|
|
|
# 添加目标知识点本身
|
|
# target_node = await self._get_knowledge_node(target_knowledge_id)
|
|
# if target_node:
|
|
# unmastered.append(target_node)
|
|
|
|
logger.info(
|
|
"学习路径推荐: student=%s, target=%s, 路径长度=%d",
|
|
student_id,
|
|
target_knowledge_id,
|
|
len(unmastered),
|
|
)
|
|
|
|
return unmastered
|
|
|
|
async def aggregate_chapter_mastery(
|
|
self,
|
|
student_id: str,
|
|
subject: str,
|
|
mastery_map: Dict[str, float],
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
按章节聚合知识点掌握度
|
|
|
|
将知识图谱按章节分组,计算每章的综合掌握度,
|
|
用于生成章节维度的学情雷达图。
|
|
"""
|
|
nodes, _ = await self.query_subject_graph(subject)
|
|
|
|
# 按章节分组
|
|
chapter_map: Dict[str, List[float]] = {}
|
|
for node in nodes:
|
|
chapter = node.chapter or "其他"
|
|
mastery = mastery_map.get(node.node_id, 0.0)
|
|
chapter_map.setdefault(chapter, []).append(mastery)
|
|
|
|
# 计算各章节平均掌握度
|
|
result = []
|
|
for chapter, masteries in chapter_map.items():
|
|
avg_mastery = sum(masteries) / max(len(masteries), 1)
|
|
result.append({
|
|
"chapter": chapter,
|
|
"avg_mastery": round(avg_mastery, 3),
|
|
"knowledge_count": len(masteries),
|
|
"mastered_count": sum(1 for m in masteries if m >= 0.6),
|
|
})
|
|
|
|
result.sort(key=lambda x: x["chapter"])
|
|
return result
|
|
|
|
async def close(self) -> None:
|
|
"""关闭Neo4j连接"""
|
|
# await self._driver.close()
|
|
logger.info("知识图谱引擎已关闭")
|