【Elasticsearch】-实现图片向量相似检索
1、http请求方式
如果elasticsearch服务设置账号密码,则在请求的header中添加 Basic Auth 认证
请求方式:Post
请求地址:/index_name/_search
请求body:json格式
{"size": 10, //返回条数"min_score": 0.8, // 设置最低相似分值"_source": ["file_name", "length", "_es_doc_type"], // 只返回指定字段"query": {"script_score": {"query": {"match_all": {}},"script": {// _img_vector 为设置的向量索引字段"source": "cosineSimilarity(params.query_vector, '_img_vector') + 0.0","params": {"query_vector": [-1,1,-0.07559559,-0.007800484,0.11229578,0.064164124,....]}}}}
}
主要参数说明:
- "from": 0, // 起始位置,0表示第一页
- "size": 10, // 每页返回的记录数
- "min_score": 0.5, //最低相似度,最高1
- "_source": ["image_id", "image_name", "image_vector"], // 返回指定字段
返回结果如下:
{"took": 3,"timed_out": false,"_shards": {"total": 1,"successful": 1,"skipped": 0,"failed": 0},"hits": {"total": {"value": 1,"relation": "eq"},"max_score": 0.9014968,"hits": [{"_index": "vedms","_type": "_doc","_id": "04a40e806be82e87f3c3a2f3877225bd.jpg","_score": 0.9014968,"_source": {"file_name": "04a40e806be82e87f3c3a2f3877225bd.jpg","_es_doc_type": "IMAGE","length": 89690}}]}
}
需要确保传入的query_vector 长度一致性,前面的章节中以设定1024长度。
否则会出现如下错误:
"reason": {
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$DenseVectorFunction.<init>(ScoreScriptUtils.java:74)",
"org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$CosineSimilarity.<init>(ScoreScriptUtils.java:172)",
"cosineSimilarity(params.query_vector, '_img_vector') + 0.0",
"
],
"script": "cosineSimilarity(params.query_vector, '_img_vector') + 0.0",
"lang": "painless",
"position": {
"offset": 38,
"start": 0,
"end": 58
},
"caused_by": {
"type": "illegal_argument_exception",
"reason": "The query vector has a different number of dimensions [1023] than the document vectors [1024]."
}
}
2、Java调用脚本
SearchRequest 不允许在script设置 _source 属性内容,所以干脆将from、size、score一并拿出,只保留vector数据
_img_vector为前面定义的向量索引字段
public List<Map<String, Object>> search(EsVectorSearchReq req) {float[] vector = getImgFeature(req);if (null == vector || vector.length == 0) {return Collections.emptyList();}String queryJson = String.format(VECTOR_FORMAT, vectorToJson(vector));log.debug("向量检索入参条件={}", queryJson);Reader input = new StringReader(queryJson);// 使用查询 DSL 进行搜索SearchRequest searchRequest = new SearchRequest.Builder().index(req.getIndexLib()).from(req.getFrom()).size(req.getSize()).minScore(req.getScore()).source(SourceConfig.of(src -> src.filter(SourceFilter.of(i -> i.includes(req.getColumns()))))).withJson(input).build();// 执行查询List<Map<String, Object>> result = new ArrayList<>();try {SearchResponse<Map> searchResponse = esClient.search(searchRequest, Map.class);// 输出结果for (Hit<Map> hit : searchResponse.hits().hits()) {result.add(hit.source());}log.info("成功查询{}条", result.size());} catch (IOException e) {e.printStackTrace();}return result;}
private String vectorToJson(float[] vector) {StringBuilder sb = new StringBuilder("[");for (int i = 0; i < vector.length; i++) {sb.append(vector[i]);if (i < vector.length - 1) {sb.append(",");}}sb.append("]");return sb.toString();}
private static final String VECTOR_FORMAT = "{\n" +" \"query\": {\n" +" \"script_score\": {\n" +" \"query\": {\n" +" \"match_all\": {}\n" +" },\n" +" \"script\": {\n" +" \"source\": \"cosineSimilarity(params.query_vector, 'img_vector') + 0.0\",\n" +" \"params\": {\n" +" \"query_vector\": %s\n" +" }\n" +" }\n" +" }\n" +" }\n" +"}";
传入参数格式如下:
{"query": {"script_score": {"query": {"match_all": {}},"script": {"source": "cosineSimilarity(params.query_vector, '_img_vector') + 0.0","params": {"query_vector": [-0.033....]}}}}
}
返回结果如下:
{
"_shards": {
"failed": 0.0,
"skipped": 0.0,
"successful": 1.0,
"total": 1.0
},
"hits": {
"hits": [
{
"_id": "04a40e806be82e87f3c3a2f3877225bd.jpg",
"_index": "vedms",
"_score": 1.0,
"_source": "{file_name=04a40e806be82e87f3c3a2f3877225bd.jpg}",
"_type": "_doc"
}
],
"max_score": 1.0,
"total": {
"relation": "eq",
"value": 1
}
},
"timed_out": false,
"took": 46
}