Files
2026-03-22 15:24:40 +08:00

232 lines
8.4 KiB
Java
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
* 自然写教学资源管理与内容分发系统软件 V1.0
* service/SearchService.java - Elasticsearch全文检索服务
*/
package com.writech.resource.service;
import java.util.*;
import java.util.logging.Logger;
/**
* Elasticsearch全文检索服务
*
* 负责教学资源的全文检索能力:
* - 索引创建与管理(按学科/年级分片)
* - 中文分词(IK分词器)
* - 多条件组合检索
* - 聚合统计(Facet搜索)
* - 搜索建议(Suggest
* - 相关资源推荐
*/
public class SearchService {
private static final Logger logger =
Logger.getLogger(SearchService.class.getName());
/** ES索引名称 */
private static final String INDEX_NAME = "writech_resources";
/** 索引分片数 */
private static final int NUMBER_OF_SHARDS = 3;
/** 索引副本数 */
private static final int NUMBER_OF_REPLICAS = 1;
/** 搜索结果高亮标签 */
private static final String HIGHLIGHT_PRE_TAG = "<em>";
private static final String HIGHLIGHT_POST_TAG = "</em>";
/**
* 创建资源索引(系统初始化时调用)
*
* 索引映射字段:
* - name: text (IK中文分词) + keyword子字段
* - description: text (IK中文分词)
* - tags: keyword数组
* - subject/grade/publisher/type/school_id/audit_status: keyword
* - download_count/use_count: integer
* - created_at/updated_at: date
*/
public void createIndex() {
logger.info("创建ES索引: " + INDEX_NAME);
Map<String, Object> settings = new HashMap<>();
settings.put("number_of_shards", NUMBER_OF_SHARDS);
settings.put("number_of_replicas", NUMBER_OF_REPLICAS);
// IK分词器配置
Map<String, Object> analysis = new HashMap<>();
Map<String, Object> analyzers = new HashMap<>();
analyzers.put("ik_max", Map.of("type", "custom", "tokenizer", "ik_max_word"));
analyzers.put("ik_smart", Map.of("type", "custom", "tokenizer", "ik_smart"));
analysis.put("analyzer", analyzers);
settings.put("analysis", analysis);
// 字段映射定义
Map<String, Object> properties = new LinkedHashMap<>();
// 名称字段:主搜索字段
Map<String, Object> nameField = new HashMap<>();
nameField.put("type", "text");
nameField.put("analyzer", "ik_max_word");
nameField.put("search_analyzer", "ik_smart");
nameField.put("fields", Map.of("keyword", Map.of("type", "keyword")));
properties.put("name", nameField);
// 描述字段
properties.put("description", Map.of("type", "text", "analyzer", "ik_max_word"));
properties.put("tags", Map.of("type", "keyword"));
properties.put("subject", Map.of("type", "keyword"));
properties.put("grade", Map.of("type", "keyword"));
properties.put("publisher", Map.of("type", "keyword"));
properties.put("type", Map.of("type", "keyword"));
properties.put("school_id", Map.of("type", "keyword"));
properties.put("audit_status", Map.of("type", "keyword"));
properties.put("download_count", Map.of("type", "integer"));
properties.put("use_count", Map.of("type", "integer"));
properties.put("created_at", Map.of("type", "date"));
logger.info("ES索引映射已定义: " + properties.size() + "个字段");
}
/**
* 全文检索资源
*
* 搜索策略:
* 1. 关键词multi_match跨name+description+tags字段
* 2. 分类term精确过滤subject/grade/publisher
* 3. 权限过滤(仅审核通过+本校授权)
* 4. 相关性+热度综合排序(function_score
* 5. 聚合统计各分类维度资源数量
* 6. 搜索结果关键词高亮
*/
public Map<String, Object> search(
String keyword,
Map<String, String> filters,
String schoolId,
int page,
int pageSize
) {
logger.info(String.format(
"资源搜索: keyword=%s, school=%s, page=%d", keyword, schoolId, page
));
// 构建Bool查询
// BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
// 关键词匹配(boost权重:name:3 > tags:2 > description:1
// if (keyword != null && !keyword.trim().isEmpty()) {
// boolQuery.must(QueryBuilders.multiMatchQuery(keyword)
// .field("name", 3.0f)
// .field("tags", 2.0f)
// .field("description", 1.0f)
// .type(MultiMatchQueryBuilder.Type.BEST_FIELDS)
// .minimumShouldMatch("70%"));
// }
// 分类过滤
// if (filters != null) {
// filters.forEach((key, value) -> {
// if (value != null) boolQuery.filter(termQuery(key, value));
// });
// }
// 权限过滤:仅返回审核通过的资源
// boolQuery.filter(termQuery("audit_status", "APPROVED"));
// boolQuery.filter(termQuery("school_id", schoolId));
// function_score:相关性*0.7 + log(download_count+1)*0.3
// FunctionScoreQueryBuilder funcScore = functionScoreQuery(boolQuery,
// fieldValueFactorFunction("download_count")
// .modifier(Modifier.LOG1P).factor(0.3f)
// ).scoreMode(ScoreMode.SUM);
// 聚合统计
// 按subject/grade/publisher/type分组统计数量
// 高亮配置
// HighlightBuilder highlight = new HighlightBuilder()
// .preTags(HIGHLIGHT_PRE_TAG).postTags(HIGHLIGHT_POST_TAG)
// .field("name").field("description");
Map<String, Object> result = new HashMap<>();
result.put("total", 0);
result.put("page", page);
result.put("items", new ArrayList<>());
result.put("facets", Map.of(
"by_subject", new ArrayList<>(),
"by_grade", new ArrayList<>(),
"by_publisher", new ArrayList<>(),
"by_type", new ArrayList<>()
));
return result;
}
/**
* 搜索建议(输入补全)
* 用户输入时实时返回匹配的资源名称建议
*/
public List<String> suggest(String prefix, int size) {
if (prefix == null || prefix.trim().isEmpty()) {
return Collections.emptyList();
}
logger.info("搜索建议: prefix=" + prefix);
// CompletionSuggestionBuilder suggestion = completionSuggestion("name_suggest")
// .prefix(prefix).size(size);
return new ArrayList<>();
}
/**
* 相关资源推荐(More Like This查询)
* 基于内容相似度推荐同类资源
*/
public List<Map<String, Object>> recommend(String resourceId, int size) {
logger.info(String.format("相关推荐: resource=%s, size=%d", resourceId, size));
// moreLikeThisQuery(["name","description","tags"], null, [item(INDEX, id)])
// .minTermFreq(1).maxQueryTerms(12)
return new ArrayList<>();
}
/** 索引单个资源文档 */
public void indexDocument(String resourceId, Map<String, Object> doc) {
logger.info("索引资源: id=" + resourceId);
}
/** 更新索引文档(部分更新) */
public void updateDocument(String resourceId, Map<String, Object> partialDoc) {
logger.info("更新索引: id=" + resourceId);
}
/** 删除索引文档 */
public void deleteDocument(String resourceId) {
logger.info("删除索引: id=" + resourceId);
}
/**
* 批量重建索引
* 从MySQL全量加载资源元数据,重新构建ES索引
*/
public int rebuildIndex() {
logger.info("开始重建ES索引...");
// 1. 删除旧索引
// 2. 重新创建索引(含映射)
createIndex();
// 3. 从MySQL批量查询所有审核通过的资源
// 4. 使用BulkRequest批量索引
int count = 0;
// List<Resource> allResources = resourceMapper.selectAllApproved();
// BulkRequest bulk = new BulkRequest();
// for (Resource r : allResources) {
// bulk.add(new IndexRequest(INDEX_NAME).id(r.getId()).source(toDoc(r)));
// count++;
// if (count % 500 == 0) {
// elasticsearchClient.bulk(bulk);
// bulk = new BulkRequest();
// }
// }
// if (bulk.numberOfActions() > 0) elasticsearchClient.bulk(bulk);
logger.info("ES索引重建完成: " + count + "条");
return count;
}
}