ik分词器扩展
1. 扩展IK原生词典类(关键改造点)
public class HotDict extends Dictionary {private static final CopyOnWriteArrayList<String> HOT_WORDS = new CopyOnWriteArrayList<>();// 动态加载新词到内存public static void reload(String filePath) {try {List<String> newWords = Files.readAllLines(Paths.get(filePath));HOT_WORDS.clear();HOT_WORDS.addAll(newWords);logger.info("热词库已更新,数量:{}", HOT_WORDS.size());} catch (IOException e) {logger.error("热词库加载失败", e);}}// 重写IK词库加载方法@Overridepublic void loadMainDict() {super.loadMainDict(); // 加载主词典HOT_WORDS.forEach(super::addWord); // 加载热词} }
ik 分词器集成spring
@Configuration
public class IKConfig {@Beanpublic Analyzer ikAnalyzer() {return new Analyzer() {@Overrideprotected TokenStreamComponents createComponents(String fieldName) {// 使用改造后的词典HotDict.loadMainDict(); return new TokenStreamComponents(new IKTokenizer());}};}
}
2. 调用分词API
@RestControllerpublic class TestController {@Autowired Analyzer analyzer;@GetMapping("/split")public List<String> split(@RequestParam String text) throws IOException {List<String> result = new ArrayList<>();TokenStream stream = analyzer.tokenStream("", text);stream.reset();while (stream.incrementToken()) {CharTermAttribute term = stream.getAttribute(CharTermAttribute.class);result.add(term.toString());}return result;}}
3. **验证初始效果*
// 避免加载过程中出现空指针
private static volatile List<String> activeWords = new ArrayList<>();private static List<String> standbyWords = new ArrayList<>();public static void reload() {standbyWords.clear();standbyWords.addAll(loadNewWords());List<String> temp = activeWords;activeWords = standbyWords;standbyWords = temp; // 原子切换
3. HTTP-API触发更新
@PostMapping("/reload-dict")public String reloadDict(@RequestParam String token) {if (!validToken(token)) return "forbidden";HotDict.reload();return "success";}
4. **性能监控**
// 记录加载耗时Long start = System.currentTimeMillis();HotDict.reload();Metrics.timer("dict_reload").record(System.currentTimeMillis() - start)