ElasticSearch + Kibana
Kibana 运维 ElasticSearch
基础使用
Insert
创建索引库, 并设定 分片数量 和 副本数量
// 格式: PUT /索引库名称
PUT /xinmeng
{
"settings": {
# 设定分片数量
"number_of_shards": 3,
# 设定副本数量
"number_of_replicas": 3
}
}
给指定的索引库创建映射
// 给指定索引库的映射树添加映射字段
PUT /xinmeng/_mapping
{
"properties":{
"color": {
"type": "keyword"
}
}
}
Delete
删除指定的索引库(普通删除)
// 格式: DELETE /索引库名称
DELETE /xinmeng
Search
查看指定索引库的映射树
// 格式: GET /索引库名称/_mapping
GET /xinmeng/_mapping
Close / Open 索引库
// 格式: POST /索引库名称/_close
POST /xinmeng/_close
// 格式: POST /索引库名称/_close
POST /xinmeng/_open
文档相关操作
Insert
添加文档, 指定文档id
/* 新增索引库数据:
POST /索引库名称/文档名称
{
"字段名称": "字段值",
"字段名称": "字段值",
"字段名称": 字段值(数字不需要双引号)
}
*/
// 若创建文档时, 不指定文档id, 则自动生成文档id, 以保证文档的唯一性
POST /xinmeng/_doc/7
{
"name": "XinMeng",
"age": 11,
"sex": "gril"
}
Search
查询指定索引库指定文档id数据
// 格式: GET /索引库名称/文档名称/文档id
GET /xinmeng/_doc/3
查询指定索引库所有数据(普通查询)
// 格式: GET /索引库名称/_search
GET /xinmeng/_search
条件查询
/* 格式
GET /索引库名/_search
{
"query":{
"查询类型":{
"查询条件":"查询条件值"
}
}
}
*/
Update
修改指定文档id的字段值
// ElasticSearch 对于修改字段值, 是以覆盖方式修改, 不可以单独修改某一字段的值
POST /xinmeng/_doc/7
{
"name": "MengXin",
"age": 9,
"sex": "gril"
}
Delete
// 格式: DELETE /索引库名称/文档名称/文档id
DELETE /xinmeng/_doc/3
Create Maping
创建指定的索引, 并创建映射树
// 创建hotel索引库, 并创建映射树
PUT /hotel
{
"mappings": {
"properties": {
"id": {
"type": "keyword"
},
"name": {
"type": "text",
"analyzer": "ik_max_word",
"copy_to": "all"
},
"address": {
"type": "text",
"analyzer": "ik_max_word"
},
"price": {
"type": "integer"
},
"score": {
"type": "integer"
},
"brand": {
"type": "text",
"analyzer": "ik_max_word",
"copy_to": "all"
},
"city": {
"type": "keyword"
},
"starName": {
"type": "keyword"
},
"business": {
"type": "keyword",
"copy_to": "all"
},
"location": {
"type": "geo_point"
},
"pic": {
"type": "keyword",
"index": false
},
"isAD": {
"type": "boolean"
},
"all": {
"type": "text",
"analyzer": "ik_max_word"
}
}
}
}
条件检索
全文检索
查询全部
GET /hotel/_search
{
"query": {
"match_all": {}
},
"size": 1000
}
单字段查询
GET /hotel/_search
{
"query": {
"match": {
"name": "如家"
}
},
"size": 1000
}
多字段查询
GET /hotel/_search
{
"query": {
"multi_match": {
"query": "如家",
"fields": ["name", "brand"]
}
},
"size": 1000
}
精准查询 (不会对查询条件分词)
Term (精确值查询)
GET /hotel/_search
{
"query": {
"term": {
"name": {
"value": "如家"
}
}
}
}
Range (范围查询)
// gte: 大于等于
// lte: 小于等于
GET /hotel/_search
{
"query": {
"range": {
"price": {
"gte": 99,
"lte": 399
}
}
}
}
地理位置查询
圆形
GET /hotel/_search
{
"query": {
"geo_distance": {
"location": "31.21, 121.5",
"distance": "15km"
}
}
}
矩形
GET /hotel/_search
{
"query": {
"geo_bounding_box": {
"location": {
"top_left":{
"lat": "31.1",
"lon": "121.5"
},
"bottom_right": {
"lat": "30.9",
"lon": "121.7"
}
}
}
}
}
组合条件查询
查询全国的酒店名称不为七天 的上海的所有酒店, 价格范围大于等于99, 小于等于199, 而且查询的地址位置是31,21, 121.5, 范围半径20km
GET /hotel/_search
{
"query": {
"bool": {
"must_not": [
{
"term": {
"name": {
"value": "七天"
}
}
}
],
"must": [
{
"range": {
"price": {
"gte": 99,
"lte": 199
}
}
}
],
"filter": {
"geo_distance": {
"distance": "20km",
"location": {
"lat": 31.21,
"lon": 121.5
}
}
},
"should": [
{
"term": {
"city": {
"value": "上海"
}
}
}
]
}
}
}
- must:必须匹配每个子查询,类似“与” (and),must的条件参与算法的
- should:选择性匹配子查询,类似“或”(or)
- must_not:必须不匹配,不参与算分,类似“非” (not)
- filter:效果和must一样的,都是and。必须匹配,filter的条件不参与算分
搜索时,参与打分的字段越多,查询的性能也越差。因此这种多条件查询时,建议:
- 搜索框的关键字搜索,是全文检索查询,使用must查询,参与算分
- 其它过滤条件,采用filter查询。不参与算分
算分查询
需求: 查询如家酒店, 对于深圳的如家进行+10分
GET /hotel/_search
{
"query": {
"function_score": {
"query": {
"match": {
"all": "如家"
}
},
"functions": [
{
"filter": {
"term": {
"city": "深圳"
}
},
"weight": 10
}
],
"boost_mode": "sum"
}
},
"size": 30
}
function score 查询中包含四部分内容:
- 原始查询条件:query部分,基于这个条件搜索文档,并且基于BM25算法给文档打分,原始算分(query score)
- 过滤条件:filter部分,符合该条件的文档才会重新算分
- 算分函数:符合filter条件的文档要根据这个函数做运算,得到的函数算分(function score),有四种函数
- weight:函数结果是常量
- field_value_factor:以文档中的某个字段值作为函数结果
- random_score:以随机数作为函数结果
- script_score:自定义算分函数算法
- 运算模式:算分函数的结果、原始查询的相关性算分,两者之间的运算方式,包括:
- multiply:相乘
- replace:用function score替换query score
- 其它,例如:sum、avg、max、min
排序查询
查找离设定经纬度坐标最近的如家酒店
GET /hotel/_search
{
"query": {
"match": {
"all": "如家"
}
},
"sort": [
{
"_geo_distance": {
"location": "39.76,116.33",
"order": "asc",
"unit": "km"
}
}
],
"from": 0,
"size": 7
}
# 或
GET /hotel/_search
{
"query": {
"match": {
"all": "如家"
}
},
"sort": [
{
"_geo_distance": {
"location": {
"lat": 39.76,
"lon": 116.33
},
"order": "asc",
"unit": "km"
}
}
]
}
高亮显示
查询如家酒店, 并高亮显示为粉色
GET /hotel/_search
{
"query": {
"match": {
"all": "如家"
}
},
"highlight": {
"fields": {
"name": {
"require_field_match": "false"
}
},
"pre_tags": "<font color='pink'>",
"post_tags": "</font>"
}
}
聚合查询
Bucket (桶聚合)
聚合查询: 桶聚合, 查询全国每个城市的酒店数量
GET /hotel/_search
{
"size": 0,
"aggs": {
"cityAgg": {
"terms": {
"field": "city",
"order": {
"_count": "asc"
},
"size": 7
}
}
}
}
查询上海的酒店,按照酒店品牌统计每一个品牌的酒店数量。 比如: 如家酒店
GET /hotel/_search
{
"query": {
"term": {
"city": {
"value": "上海"
}
}
},
"size": 0,
"aggs": {
"brandAggs": {
"terms": {
"field": "brand",
"size": 100,
"order": {
"_count": "asc"
}
}
}
}
}
度量聚合
统计每个品牌酒店的用户评分的min、max、avg等值
# 需求:统计酒店的用户评分的平均分
GET hotel/_search
{
"size": 0,
"aggs": {
"avgAggs": {
"avg": {
"field": "score"
}
}
}
}
酒店的最高分
GET hotel/_search
{
"size": 0,
"aggs": {
"maxAggs": {
"max": {
"field": "score"
}
}
}
}
酒店的最高分
GET hotel/_search
{
"size": 0,
"aggs": {
"minAggs": {
"min": {
"field": "score"
}
}
}
}
stats 求出最大值、最小值、平均值…
GET hotel/_search
{
"size": 0,
"aggs": {
"statsAggs": {
"stats": {
"field": "score"
}
}
}
}
在聚合的之前加入限制条件 query
统计北京的酒店的品牌多少家?(限制条件用在桶聚合)
GET hotel/_search
{
"query": {
"term": {
"city": {
"value": "北京"
}
}
},
"size": 0,
"aggs": {
"brandAgg": {
"terms": {
"field": "brand",
"size": 30
}
}
}
}
统计上海的酒店的用户评分的平均分?(限制条件用在度量聚合)
GET hotel/_search
{
"query": {
"term": {
"city": {
"value": "上海"
}
}
},
"size": 0,
"aggs": {
"scoreAvg": {
"avg": {
"field": "score"
}
}
}
}
管道聚合(pipline聚合)一个聚合的结果作为另一个聚合的条件
需求:统计上海的酒店的每个品牌的平均分?
- 默认排序(根据第一个聚合的总数倒序显示)
GET hotel/_search
{
"query": {
"term": {
"city": {
"value": "上海"
}
}
},
"size": 0,
"aggs": {
"brandAggs": {
"terms": {
"field": "brand",
"size": 100,
"order": {
"avgAggs": "desc"
}
},
"aggs": {
"avgAggs": {
"avg": {
"field": "score"
}
}
}
}
}
}
自动补全查询
GET hotel/_search
{
"suggest": {
"hotelSuggestion": {
"text": "北", // 查询内容
"completion":{
"field":"suggestion", //自动补全字段, 对应JavaBean中定义的自动补全属性
"skip_duplicates":true,
"size":10
}
}
}
}
自定义分词器
// analyzer: 默认情况下,构建和搜索索引库时都使用该分词器,但如果存在search_analyzer属性后,只在构建时使用
// search_analyzer:只用在搜索索引库时
PUT /test
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "ik_smart",
"filter": "py"
}
},
"filter": {
"py": {
"type": "pinyin",
"keep_full_pinyin": false,
"keep_joined_full_pinyin": true,
"keep_original": true,
"limit_first_letter_length": 16,
"remove_duplicated_term": true,
"none_chinese_pinyin_tokenize": false
}
}
}
},
"mappings": {
"properties": {
"name":{
"type": "text",
"analyzer": "my_analyzer",
"search_analyzer": "ik_max_word"
}
}
}
}
完整示例
# 构建拼音分词器索引库
PUT /maomao_index
{
"settings": {
"analysis": {
"analyzer": {
"maomao_pinyin_analyzer": {
"tokenizer": "ik_max_word",
"filter": "pinyin"
}
},
"filter": {
"pinyin": {
"type": "pinyin", # 设定分词器类型为拼音
"keep_first_leeter": true, # 文字的拼音首字母
"keep_separate_first_letter": false, # 不单独保留首字母
"keep_full_pinyin": true, # 每个文字全拼
"limit_first_letter_length": 16, # 设置first_letter结果的最大长度为16
"keep_joined_full_pinyin": true, # 开启连拼(全部)
"keep_none_chinese": true, # 在结果中保留非中文字母或数字
"keep_none_chinese_together": true, # 将非中文字母放在一起
"keep_none_chinese_in_first_letter": true, # 将非中文字母保留在首字母中
"keep_none_chinese_in_joined_full_pinyin": true, # 将非中文字母保留在连接的完整拼音中
"none_chinese_pinyin_tokenize": true, # 将非中文字母分解为单独的拼音术语(如果是拼音)
"keep_original": true, # 启用此选项后,也将保留原始输入
"lowercase": true, # 小写非中文字母
"trim_whitespace": true,
"remove_duplicated_term": false, # 将删除重复的术语以保存索引
"ignore_pinyin_offset": true # 重叠令牌将允许忽略偏移量
}
}
}
},
"mappings": {
"properties": {
"id": {
"type": "keyword",
"index": true
},
"name": {
"type": "text",
"analyzer": "maomao_pinyin_analyzer",
"search_analyzer": "ik_max_word",
"copy_to": "maomaoAll",
"index": true
},
"address": {
"type": "text",
"analyzer": "maomao_pinyin_analyzer",
"search_analyzer": "ik_max_word",
"copy_to": "maomaoAll",
"index": true
},
"price": {
"type": "integer",
"index": true
},
"score": {
"type": "integer",
"index": true
},
"brand": {
"type": "keyword",
"copy_to": "maomaoAll",
"index": true
},
"city": {
"type": "keyword",
"index": true
},
"starName": {
"type": "keyword",
"index": true
},
"business": {
"type": "text",
"analyzer": "maomao_pinyin_analyzer",
"search_analyzer": "ik_max_word",
"copy_to": "maomaoAll",
"index": true
},
"pic": {
"type": "keyword",
"index": false
},
"isAD": {
"type": "keyword",
"index": true
},
"location": {
"type": "geo_point",
"index": true
},
"maomaoAll": {
"type": "text",
"analyzer": "maomao_pinyin_analyzer",
"search_analyzer": "ik_max_word",
"index": true
}
}
}
}
- 可选参数
– keep_first_letter启用此选项时,默认值:true, 例如:> 刘德华ldh
– keep_separate_first_letter启用此选项时,将分别保留首字母,默认值:false
– limit_first_letter_length设置first_letter结果的最大长度,默认值:16
– keep_full_pinyin当启用此选项时,例如:> [,,],默认值:true刘德华liudehua
– keep_joined_full_pinyin当启用此选项时,例如:> [],默认值:false刘德华liudehua
– keep_none_chinese在结果中保留非中文字母或数字,默认值:true
– keep_none_chinese_together将非中文字母放在一起,默认值:true
– keep_none_chinese_in_first_letter在第一个字母中保留非中文字母
– keep_none_chinese_in_joined_full_pinyin将非中文字母保留在连接的完整拼音中
– none_chinese_pinyin_tokenize如果非中文字母是拼音,则将其拆分为单独的拼音术语,默认值:true
– keep_original启用此选项时,也会保留原始输入,默认值:false, 使用此属性时, 因该设置为true, 要保留原始值
– lowercase小写非中文字母,默认值:true
– trim_whitespace默认值:true
– remove_duplicated_term启用此选项时,将删除重复的术语以保存索引,默认值:false
– ignore_pinyin_offset6.0之后,偏移量受到严格约束,不允许重叠标记,使用此参数,重叠标记将允许忽略偏移量,请注意,所有与位置相关的查询或突出显示都会变得不正确,您应该使用多个字段并指定不同的设置,用于不同的查询目的。如果您需要偏移量,请将其设置为 false。默认值:true。
整合示例
DSL
PUT /hotel
{
"settings": {
"analysis": {
"analyzer": {
"text_anlyzer": {
"tokenizer": "ik_max_word",
"filter": "py"
}
},
"filter": {
"py": {
"type": "pinyin",
"keep_full_pinyin": false,
"keep_joined_full_pinyin": true,
"keep_original": true,
"limit_first_letter_length": 16,
"remove_duplicated_term": true,
"none_chinese_pinyin_tokenize": false
}
}
}
},
"mappings": {
"properties": {
"id":{
"type": "keyword"
},
"name":{
"type": "text",
"analyzer": "text_anlyzer",
"search_analyzer": "ik_max_word",
"copy_to": "all"
},
"address":{
"type": "keyword",
"index": false
},
"price":{
"type": "integer"
},
"score":{
"type": "integer"
},
"brand":{
"type": "keyword",
"copy_to": "all"
},
"city":{
"type": "keyword"
},
"starName":{
"type": "keyword"
},
"business":{
"type": "keyword",
"copy_to": "all"
},
"location":{
"type": "geo_point"
},
"pic":{
"type": "keyword",
"index": false
},
"all":{
"type": "text",
"analyzer": "text_anlyzer",
"search_analyzer": "ik_max_word"
},
"suggestion":{
"type": "completion",
"analyzer": "text_anlyzer",
"search_analyzer": "ik_max_word"
}
}
}
}
Java Bean
@Data
@NoArgsConstructor
public class HotelDoc {
private Long id;
private String name;
private String address;
private Integer price;
private Integer score;
private String brand;
private String city;
private String starName;
private String business;
private String pic;
private String all;
private String location;
private Boolean isAD;
private Object distance;//距离值
private List<String> suggestion;//用于自动补全字段
public HotelDoc(Hotel hotel) {
this.id = hotel.getId();
this.name = hotel.getName();
this.address = hotel.getAddress();
this.price = hotel.getPrice();
this.score = hotel.getScore();
this.brand = hotel.getBrand();
this.city = hotel.getCity();
this.starName = hotel.getStarName();
this.business = hotel.getBusiness();
this.pic = hotel.getPic();
this.isAD = hotel.getIsAD();
this.location = hotel.getLatitude() + ", " + hotel.getLongitude();
//填充suggestion字段: brand city business
suggestion = new ArrayList<>();
suggestion.add(hotel.getBrand());
suggestion.add(hotel.getCity());
suggestion.add(hotel.getBusiness());
}
}
测试分词器
GET _analyze
{
"text": "我有我的快乐和自由",
"analyzer": "ik_max_word"
}