Springboot集成elasticsearch 使用IK+拼音分词 - Go语言中文社区

Springboot集成elasticsearch 使用IK+拼音分词


Springboot集成elasticsearch 使用IK+拼音分词

docker安装ES

下载

docker pull docker.elastic.co/elasticsearch/elasticsearch:6.3.2

启动

docker run -d --name="es" -p 9200:9200 -p 9300:9300 -e "cluster.name=elasticsearch" -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:6.3.2

springboot集成

maven依赖
<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
    <version>2.1.4.RELEASE</version>
 </dependency>
配置文件
spring:
  data:
    elasticsearch:
      cluster-name: elasticsearch
      cluster-nodes: ip:9300
实体类
package com.my.entity;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.elasticsearch.annotations.Document;

/**
 * <一句话功能简述><br>
 * ()
 *
 * @author M.Y
 * @date 2019/5/30
 * @since 1.0.0
 */
@AllArgsConstructor
@NoArgsConstructor
@Data
@Document(indexName = "contents",type = "content")
//indexName索引名称 可以理解为数据库名 必须为小写 不然会报org.elasticsearch.indices.InvalidIndexNameException异常
//type类型 可以理解为表名
public class GoodsInfo {
    private Long id;
    private String name;
    private String des;

}

DAO
package com.my.dao;

import com.my.entity.GoodsInfo;
import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;
import org.springframework.stereotype.Component;

/**
 * <一句话功能简述><br>
 * ()
 *
 * @author M.Y
 * @date 2019/5/30
 * @since 1.0.0
 */
@Component
public interface GoodsRepository extends ElasticsearchRepository<GoodsInfo,Long> {
}

Controller
package com.my.controller;

import com.my.dao.GoodsRepository;
import com.my.entity.GoodsInfo;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;

/**
 * <一句话功能简述><br>
 * ()
 *
 * @author M.Y
 * @date 2019/5/30
 * @since 1.0.0
 */
@RestController
public class GoodsController {
    @Autowired
    private GoodsRepository goodsRepository;
    //http://localhost:8080/save?des=
    @GetMapping("save")
    public String save(String des){
        GoodsInfo goodsInfo = new GoodsInfo(System.currentTimeMillis(),
                "商品"+System.currentTimeMillis(),des);
        goodsRepository.save(goodsInfo);
        return "success";
    }

    //http://localhost:8080/delete?id=
    @GetMapping("delete")
    public String delete(long id){
        goodsRepository.deleteById(id);
        return "success";
    }

    //http://localhost:8080/update?name=修改&des=修改&id=
    @GetMapping("update")
    public String update(long id,String name,String description){
        GoodsInfo goodsInfo = new GoodsInfo(id,
                name,description);
        goodsRepository.save(goodsInfo);
        return "success";
    }

    //http://localhost:8080/getOne?id=
    @GetMapping("getOne")
    public GoodsInfo getOne(long id){
        GoodsInfo goodsInfo = goodsRepository.findById(id).orElse(null);
        return goodsInfo;
    }
}
测试

在这里插入图片描述

谷歌elasticsearch-head插件查看

在这里插入图片描述

安装IK分词插件

进入容器

docker exec -it es bash

进入目录

cd /usr/share/elasticsearch/

下载安装插件(注意版本要与es一致)

./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.3.2/elasticsearch-analysis-ik-6.3.2.zip

退出容器

exit

重启容器

docker restart es

验证结果

ik_max_word:尽可能多的分词

在这里插入图片描述

ik_smart:尽可能少的分词

在这里插入图片描述

安装拼音转换插件

进入容器

docker exec -it es bash

进入目录

cd /usr/share/elasticsearch/

下载安装插件(注意版本要与es一致)

./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-pinyin/releases/download/v6.3.2/elasticsearch-analysis-pinyin-6.3.2.zip

退出容器

exit

重启容器

docker restart es

新建索引

在这里插入图片描述

关闭索引(更新配置前要关闭索引,不然会报错)

在这里插入图片描述

创建拼音分词

在这里插入图片描述

{
  "analysis" : {
     "analyzer" : {
         "pinyin_analyzer" : {
             "tokenizer" : "my_pinyin"
          }
     },
     "tokenizer" : {
         "my_pinyin" : {
             "type" : "pinyin",
             "keep_separate_first_letter" : false,
             "keep_full_pinyin" : true,
             "keep_original" : true,
             "limit_first_letter_length" : 16,
             "lowercase" : true,
             "remove_duplicated_term" : true
         }
     }
  }
}
验证结果

在这里插入图片描述

Springboot集成以上分词

新建索引,新增分词配置

在这里插入图片描述

{
  "analysis" : {
     "analyzer" : {
         "pinyin_analyzer" : {
             "tokenizer" : "my_pinyin"
          }
     },
     "tokenizer" : {
         "my_pinyin" : {
             "type" : "pinyin",
             "keep_separate_first_letter" : false,
             "keep_full_pinyin" : true,
             "keep_original" : true,
             "limit_first_letter_length" : 16,
             "lowercase" : true,
             "remove_duplicated_term" : true
         }
     }
  }
}

在这里插入图片描述

{
        "properties": {
            "字段名": {
                "type": "keyword",
                "fields": {
                    "pinyin": {
                        "type": "text",
                        "store": false,
                        "term_vector": "with_offsets",
                        "analyzer": "pinyin_analyzer",
                        "boost": 10
                    }
                }
            }
        }
    
}
新实体类
package com.my.entity;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Mapping;

import java.util.Date;

@AllArgsConstructor
@NoArgsConstructor
@Data
@Document(indexName = "new_film", type = "new")
public class FilmEntity {

    private Long id;
    private String name;
    private String director;
    private Date created ;

    @Override
    public String toString() {
        return "FilmEntity [id=" + id + ", name=" + name + ", director=" + director + "]";
    }
}

新增测试数据
	@Autowired
    FilmRepository filmRepository;
    @GetMapping("save")
    public String save(String des,String name){
        LocalDateTime localDateTime = LocalDateTime.now();
        ZoneId zone = ZoneId.systemDefault();
        Instant instant = localDateTime.atZone(zone).toInstant();
        Date date = Date.from(instant);
        FilmEntity filmEntity = new FilmEntity(System.currentTimeMillis(),name,des,date);
        filmRepository.save(filmEntity);
        return "success";
    }

在这里插入图片描述

查询
	/**
     * 拼接搜索条件
     *
     * @param name     the name
     * @return list
     */
    @GetMapping("search")
    public List<FilmEntity> search(String name) {
        SearchQuery searchQuery = new NativeSearchQueryBuilder()
                .withQuery(structureQuery(name))
                .build();
        List<FilmEntity> list = filmRepository.search(searchQuery).getContent();
        return list;
    }

    /**
     * 中文、拼音混合搜索
     *
     * @param content the content
     * @return dis max query builder
     */
    public DisMaxQueryBuilder structureQuery(String content) {
        //使用dis_max直接取多个query中,分数最高的那一个query的分数即可
        DisMaxQueryBuilder disMaxQueryBuilder = QueryBuilders.disMaxQuery();
        //boost 设置权重,只搜索匹配name和disrector字段
        QueryBuilder ikNameQuery = QueryBuilders.matchQuery("name", content).boost(2f);
        QueryBuilder pinyinNameQuery = QueryBuilders.matchQuery("name.pinyin", content);
        QueryBuilder ikDirectorQuery = QueryBuilders.matchQuery("director", content).boost(2f);
        disMaxQueryBuilder.add(ikNameQuery);
        disMaxQueryBuilder.add(pinyinNameQuery);
        disMaxQueryBuilder.add(ikDirectorQuery);
        return disMaxQueryBuilder;
    }

http://localhost:8080/film/search?name=中国:

[
    {
        "id": 1559724973532,
        "name": "ceshi",
        "director": "中韩渔警冲突调查:韩警平均每天扣1艘中国渔船",
        "created": "2019-06-05T08:56:13.531+0000"
    },
    {
        "id": 1559724949646,
        "name": "测试",
        "director": "中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首",
        "created": "2019-06-05T08:55:49.645+0000"
    },
    {
        "id": 1559724960792,
        "name": "小明",
        "director": "美国留给伊拉克的是个烂摊子吗",
        "created": "2019-06-05T08:56:00.792+0000"
    }
]

http://localhost:8080/film/search?name=ceshi

http://localhost:8080/film/search?name=测试

[
    {
        "id": 1559786111119,
        "name": "ceshi",
        "director": "中韩渔警冲突调查:韩警平均每天扣1艘中国渔船",
        "created": "2019-06-06T01:55:11.119+0000"
    },
    {
        "id": 1559786123724,
        "name": "测试",
        "director": "中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首",
        "created": "2019-06-06T01:55:23.724+0000"
    }
]

构建高亮查询
	@Autowired
    TransportClient client;
	/**
     * 构建高亮查询
     * @param des
     * @return
     */
    @GetMapping("query")
    public List<FilmEntity> query(String des) {
        QueryBuilder query = structureQuery(des);
        // 加入查询中
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.preTags("<span>");//设置前缀
        highlightBuilder.postTags("</span>");//设置后缀
        highlightBuilder.field("name");//设置高亮字段
        highlightBuilder.field("director");//设置高亮字段
//      highlightBuilder.field("name.pinyin");//这里设置之后没有生效,如果有大佬知道请告知谢谢
        SearchResponse response = client.prepareSearch("new_film")
                .setTypes("new")
                .setQuery(query).highlighter(highlightBuilder).execute().actionGet();

        // 遍历结果, 获取高亮片段
        SearchHits searchHits = response.getHits();
        FilmEntity filmEntity = null;
        List<FilmEntity> result = new ArrayList<>();
        for (SearchHit hit : searchHits) {
            Map<String, Object> entityMap = hit.getSourceAsMap();
            filmEntity = com.alibaba.fastjson.JSON.parseObject(JSON.toJSONString(entityMap), FilmEntity.class);
            if (!StringUtils.isEmpty(hit.getHighlightFields().get("name"))) {
                Text[] text = hit.getHighlightFields().get("name").getFragments();
                filmEntity.setName(text[0].toString());
            }
            if (!StringUtils.isEmpty(hit.getHighlightFields().get("director"))) {
                Text[] text = hit.getHighlightFields().get("director").getFragments();
                filmEntity.setDirector(text[0].toString());
            }
            result.add(filmEntity);
        }
        return result;
    }

http://localhost:8080/film/query?des=中国

[
    {
        "id": 1559786111119,
        "name": "ceshi",
        "director": "<span>中</span>韩渔警冲突调查:韩警平均每天扣1艘<span>中</span><span>国</span>渔船",
        "created": "2019-06-06T01:55:11.119+0000"
    },
    {
        "id": 1559786123724,
        "name": "测试",
        "director": "<span>中</span><span>国</span>驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首",
        "created": "2019-06-06T01:55:23.724+0000"
    },
    {
        "id": 1559786119620,
        "name": "小明",
        "director": "美<span>国</span>留给伊拉克的是个烂摊子吗",
        "created": "2019-06-06T01:55:19.620+0000"
    }
]

http://localhost:8080/film/query?des=测试

[
    {
        "id": 1559786123724,
        "name": "<span>测试</span>",
        "director": "中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首",
        "created": "2019-06-06T01:55:23.724+0000"
    },
    {
        "id": 1559786111119,
        "name": "ceshi",
        "director": "中韩渔警冲突调查:韩警平均每天扣1艘中国渔船",
        "created": "2019-06-06T01:55:11.119+0000"
    }
]

代码地址

版权声明:本文来源CSDN,感谢博主原创文章,遵循 CC 4.0 by-sa 版权协议,转载请附上原文出处链接和本声明。
原文链接:https://blog.csdn.net/qq_20391065/article/details/91041153
站方申明:本站部分内容来自社区用户分享,若涉及侵权,请联系站方删除。
  • 发表于 2020-03-08 17:07:54
  • 阅读 ( 1823 )
  • 分类:

0 条评论

请先 登录 后评论

官方社群

GO教程

猜你喜欢