ElasticSearch + Kibana

Kibana 运维 ElasticSearch

与之对应的Java Api

基础使用

Insert

创建索引库, 并设定 分片数量 和 副本数量
// 格式: PUT /索引库名称
PUT /xinmeng
{
  "settings": {
    # 设定分片数量
    "number_of_shards": 3,
    # 设定副本数量
    "number_of_replicas": 3
  }
}
给指定的索引库创建映射
// 给指定索引库的映射树添加映射字段
PUT /xinmeng/_mapping
{
  "properties":{
    "color": {
      "type": "keyword"
    }
  }
}

Delete

删除指定的索引库(普通删除)
// 格式: DELETE /索引库名称
DELETE /xinmeng

Search

查看指定索引库的映射树
// 格式: GET /索引库名称/_mapping
GET /xinmeng/_mapping

Close / Open 索引库

// 格式: POST /索引库名称/_close
POST /xinmeng/_close
// 格式: POST /索引库名称/_close
POST /xinmeng/_open

文档相关操作

Insert

添加文档, 指定文档id
/* 新增索引库数据: 
POST /索引库名称/文档名称
{
	"字段名称": "字段值",
	"字段名称": "字段值",
	"字段名称": 字段值(数字不需要双引号)
}
*/
// 若创建文档时, 不指定文档id, 则自动生成文档id, 以保证文档的唯一性
POST /xinmeng/_doc/7
{
	"name": "XinMeng",
	"age": 11,
	"sex": "gril"
}

Search

查询指定索引库指定文档id数据
// 格式: GET /索引库名称/文档名称/文档id
GET /xinmeng/_doc/3
查询指定索引库所有数据(普通查询)
// 格式: GET /索引库名称/_search
GET /xinmeng/_search
条件查询
/* 格式
GET /索引库名/_search
{
    "query":{
        "查询类型":{
            "查询条件":"查询条件值"
        }
    }
}
*/

Update

修改指定文档id的字段值
// ElasticSearch 对于修改字段值, 是以覆盖方式修改, 不可以单独修改某一字段的值
POST /xinmeng/_doc/7
{
	"name": "MengXin",
	"age": 9,
	"sex": "gril"
}

Delete

// 格式: DELETE /索引库名称/文档名称/文档id
DELETE /xinmeng/_doc/3

Create Maping

创建指定的索引, 并创建映射树
// 创建hotel索引库, 并创建映射树
PUT /hotel
{
  "mappings": {
    "properties": {
      "id": {
        "type": "keyword"
      },
      "name": {
        "type": "text",
        "analyzer": "ik_max_word",
        "copy_to": "all"
      },
      "address": {
        "type": "text",
        "analyzer": "ik_max_word"
      },
      "price": {
        "type": "integer"
      },
      "score": {
        "type": "integer"
      },
      "brand": {
        "type": "text",
        "analyzer": "ik_max_word",
        "copy_to": "all"
      },
      "city": {
        "type": "keyword"
      },
      "starName": {
        "type": "keyword"
      },
      "business": {
        "type": "keyword",
        "copy_to": "all"
      },
      "location": {
        "type": "geo_point"
      },
      "pic": {
        "type": "keyword",
        "index": false
      }, 
      "isAD": {
        "type": "boolean"
      },
      "all": {
        "type": "text",
        "analyzer": "ik_max_word"
      }
    }
  }
}

条件检索

全文检索

查询全部
GET /hotel/_search
{
  "query": {
    "match_all": {}
  },
  "size": 1000
}
单字段查询
GET /hotel/_search
{
  "query": {
    "match": {
      "name": "如家"
    }
  },
  "size": 1000
}
多字段查询
GET /hotel/_search
{
  "query": {
    "multi_match": {
      "query": "如家",
      "fields": ["name", "brand"]
    }
  },
  "size": 1000
}

精准查询 (不会对查询条件分词)

Term (精确值查询)
GET /hotel/_search
{
  "query": {
    "term": {
      "name": {
        "value": "如家"
      }
    }
  }
}
Range (范围查询)
// gte: 大于等于
// lte: 小于等于
GET /hotel/_search
{
  "query": {
    "range": {
      "price": {
        "gte": 99,
        "lte": 399
      }
    }
  }
}

地理位置查询

圆形
GET /hotel/_search
{
  "query": {
    "geo_distance": {
      "location": "31.21, 121.5",
      "distance": "15km"
    }
  }
}
矩形
GET /hotel/_search
{
  "query": {
    "geo_bounding_box": {
      "location": {
        "top_left":{
          "lat": "31.1",
          "lon": "121.5"
        },
        "bottom_right": {
          "lat": "30.9",
          "lon": "121.7"
        }
      }
    }
  }
}

组合条件查询

查询全国的酒店名称不为七天 的上海的所有酒店, 价格范围大于等于99, 小于等于199, 而且查询的地址位置是31,21, 121.5, 范围半径20km

GET /hotel/_search
{
  "query": {
    "bool": {
      "must_not": [
        {
          "term": {
            "name": {
              "value": "七天"
            }
          }
        }
      ],
      "must": [
        {
          "range": {
            "price": {
              "gte": 99,
              "lte": 199
            }
          }
        }
      ],
      "filter": {
        "geo_distance": {
          "distance": "20km",
          "location": {
            "lat": 31.21,
            "lon": 121.5
          }
        }
      },
      "should": [
        {
          "term": {
            "city": {
              "value": "上海"
            }
          }
        }
      ]
    }
  }
}
  • must:必须匹配每个子查询,类似“与” (and),must的条件参与算法的
  • should:选择性匹配子查询,类似“或”(or)
  • must_not:必须不匹配,不参与算分,类似“非” (not)
  • filter:效果和must一样的,都是and。必须匹配,filter的条件不参与算分

搜索时,参与打分的字段越多,查询的性能也越差。因此这种多条件查询时,建议:

  • 搜索框的关键字搜索,是全文检索查询,使用must查询,参与算分
  • 其它过滤条件,采用filter查询。不参与算分

算分查询

需求: 查询如家酒店, 对于深圳的如家进行+10分

GET /hotel/_search
{
  "query": {
    "function_score": {
      "query": {
        "match": {
          "all": "如家"
        }
      },
      "functions": [
        {
          "filter": {
            "term": {
              "city": "深圳"
            }
          },
          "weight": 10
        }
      ],
      "boost_mode": "sum"
    }
  },
  "size": 30
}

function score 查询中包含四部分内容:

  • 原始查询条件:query部分,基于这个条件搜索文档,并且基于BM25算法给文档打分,原始算分(query score)
  • 过滤条件:filter部分,符合该条件的文档才会重新算分
  • 算分函数:符合filter条件的文档要根据这个函数做运算,得到的函数算分(function score),有四种函数
  • weight:函数结果是常量
  • field_value_factor:以文档中的某个字段值作为函数结果
  • random_score:以随机数作为函数结果
  • script_score:自定义算分函数算法
  • 运算模式:算分函数的结果、原始查询的相关性算分,两者之间的运算方式,包括:
  • multiply:相乘
  • replace:用function score替换query score
  • 其它,例如:sum、avg、max、min

排序查询

查找离设定经纬度坐标最近的如家酒店

GET /hotel/_search
{
  "query": {
    "match": {
      "all": "如家"
    }
  },
  "sort": [
    {
      "_geo_distance": {
        "location": "39.76,116.33",
        "order": "asc",
        "unit": "km"
      }
    }
  ],
  "from": 0,
  "size": 7
}

# 或
GET /hotel/_search
{
  "query": {
    "match": {
      "all": "如家"
    }
  },
  "sort": [
    {
      "_geo_distance": {
        "location": {
          "lat": 39.76,
          "lon": 116.33
        },
        "order": "asc",
        "unit": "km"
      }
    }
  ]
}

高亮显示

查询如家酒店, 并高亮显示为粉色

GET /hotel/_search
{
  "query": {
    "match": {
      "all": "如家"
    }
  },
  "highlight": {
    "fields": {
      "name": {
        "require_field_match": "false"
      }
    },
    "pre_tags": "<font color='pink'>",
    "post_tags": "</font>"
  }
}

聚合查询

Bucket (桶聚合)

聚合查询: 桶聚合, 查询全国每个城市的酒店数量

GET /hotel/_search
{
  "size": 0,
  "aggs": {
    "cityAgg": {
      "terms": {
        "field": "city",
        "order": {
          "_count": "asc"
        }, 
        "size": 7
      }
    }
  }
}

查询上海的酒店,按照酒店品牌统计每一个品牌的酒店数量。 比如: 如家酒店

GET /hotel/_search
{
  "query": {
    "term": {
      "city": {
        "value": "上海"
      }
    }
  },
  "size": 0,
  "aggs": {
    "brandAggs": {
      "terms": {
        "field": "brand",
        "size": 100,
        "order": {
          "_count": "asc"
        }
      }
    }
  }
}
度量聚合

统计每个品牌酒店的用户评分的min、max、avg等值

# 需求:统计酒店的用户评分的平均分

GET hotel/_search
{
  "size": 0,
  "aggs": {
    "avgAggs": {
       "avg": {
         "field": "score"
       }
    }
  }
}

酒店的最高分

GET hotel/_search
{
  "size": 0,
  "aggs": {
    "maxAggs": {
       "max": {
         "field": "score"
       }
    }
  }
}

酒店的最高分

GET hotel/_search
{
  "size": 0,
  "aggs": {
    "minAggs": {
       "min": {
         "field": "score"
       }
    }
  }
}

stats 求出最大值、最小值、平均值…

GET hotel/_search
{
  "size": 0,
  "aggs": {
    "statsAggs": {
       "stats": {
         "field": "score"
       }
    }
  }
}
在聚合的之前加入限制条件 query

统计北京的酒店的品牌多少家?(限制条件用在桶聚合)

GET hotel/_search
{
  "query": {
    "term": {
      "city": {
        "value": "北京"
      }
    }
  },
  "size": 0, 
  "aggs": {
    "brandAgg": {
      "terms": {
        "field": "brand",
        "size": 30
      }
    }
  }
}

统计上海的酒店的用户评分的平均分?(限制条件用在度量聚合)

GET hotel/_search
{
   "query": {
    "term": {
      "city": {
        "value": "上海"
      }
    }
  },
  "size": 0,
  "aggs": {
    "scoreAvg": {
      "avg": {
        "field": "score"
      }
    }
  }
}
管道聚合(pipline聚合)一个聚合的结果作为另一个聚合的条件

需求:统计上海的酒店的每个品牌的平均分?

  • 默认排序(根据第一个聚合的总数倒序显示)
GET hotel/_search
{
  "query": {
    "term": {
      "city": {
        "value": "上海"
      }
    }
  },
  "size": 0,
  "aggs": {
    "brandAggs": {
      "terms": {
        "field": "brand",
        "size": 100,
        "order": {
          "avgAggs": "desc"
        }
      },
      "aggs": {
        "avgAggs": {
          "avg": {
            "field": "score"
          }
        }
      }
    }
  }
}

自动补全查询

GET hotel/_search
{
  "suggest": {
    "hotelSuggestion": {
      "text": "北",	// 查询内容
      "completion":{
        "field":"suggestion",	//自动补全字段, 对应JavaBean中定义的自动补全属性
        "skip_duplicates":true,
        "size":10
      }
    }
  }
}

自定义分词器

// analyzer: 默认情况下,构建和搜索索引库时都使用该分词器,但如果存在search_analyzer属性后,只在构建时使用
// search_analyzer:只用在搜索索引库时
PUT /test
{
  "settings": {
    "analysis": {
      "analyzer": { 
        "my_analyzer": { 
          "tokenizer": "ik_smart",
          "filter": "py"
        }
      },
      "filter": {
        "py": { 
          "type": "pinyin",
          "keep_full_pinyin": false,
          "keep_joined_full_pinyin": true,
          "keep_original": true,
          "limit_first_letter_length": 16,
          "remove_duplicated_term": true,
          "none_chinese_pinyin_tokenize": false
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "name":{
        "type": "text",
        "analyzer": "my_analyzer",
        "search_analyzer": "ik_max_word"
      }
    }
  }
}

完整示例

# 构建拼音分词器索引库
PUT /maomao_index
{
  "settings": {
    "analysis": {
      "analyzer": {
        "maomao_pinyin_analyzer": {
          "tokenizer": "ik_max_word",
          "filter": "pinyin"
        }
      },
      "filter": {
        "pinyin": {
          "type": "pinyin", # 设定分词器类型为拼音
          "keep_first_leeter": true, # 文字的拼音首字母
          "keep_separate_first_letter": false, # 不单独保留首字母
          "keep_full_pinyin": true, # 每个文字全拼
          "limit_first_letter_length": 16, # 设置first_letter结果的最大长度为16
          "keep_joined_full_pinyin": true, # 开启连拼(全部)
          "keep_none_chinese": true, # 在结果中保留非中文字母或数字
          "keep_none_chinese_together": true, # 将非中文字母放在一起
          "keep_none_chinese_in_first_letter": true, # 将非中文字母保留在首字母中
          "keep_none_chinese_in_joined_full_pinyin": true, # 将非中文字母保留在连接的完整拼音中
          "none_chinese_pinyin_tokenize": true, # 将非中文字母分解为单独的拼音术语(如果是拼音)
          "keep_original": true, # 启用此选项后,也将保留原始输入
          "lowercase": true, # 小写非中文字母
          "trim_whitespace": true,
          "remove_duplicated_term": false, # 将删除重复的术语以保存索引
          "ignore_pinyin_offset": true # 重叠令牌将允许忽略偏移量
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "id": {
        "type": "keyword",
        "index": true
      },
      "name": {
        "type": "text",
        "analyzer": "maomao_pinyin_analyzer",
        "search_analyzer": "ik_max_word",
        "copy_to": "maomaoAll",
        "index": true
      },
      "address": {
        "type": "text",
        "analyzer": "maomao_pinyin_analyzer",
        "search_analyzer": "ik_max_word",
        "copy_to": "maomaoAll",
        "index": true
      },
      "price": {
        "type": "integer",
        "index": true
      },
      "score": {
        "type": "integer",
        "index": true
      },
      "brand": {
        "type": "keyword",
        "copy_to": "maomaoAll",
        "index": true
      },
      "city": {
        "type": "keyword",
        "index": true
      },
      "starName": {
        "type": "keyword",
        "index": true
      },
      "business": {
        "type": "text",
        "analyzer": "maomao_pinyin_analyzer",
        "search_analyzer": "ik_max_word",
        "copy_to": "maomaoAll",
        "index": true
      },
      "pic": {
        "type": "keyword",
        "index": false
      },
      "isAD": {
        "type": "keyword",
        "index": true
      },
      "location": {
        "type": "geo_point",
        "index": true
      },
      "maomaoAll": {
        "type": "text",
        "analyzer": "maomao_pinyin_analyzer",
        "search_analyzer": "ik_max_word",
        "index": true
      }
    }
  }
}

自定义分词器官方地址(GitHub)

  • 可选参数
    – keep_first_letter启用此选项时,默认值:true, 例如:> 刘德华ldh
    – keep_separate_first_letter启用此选项时,将分别保留首字母,默认值:false
    – limit_first_letter_length设置first_letter结果的最大长度,默认值:16
    – keep_full_pinyin当启用此选项时,例如:> [,,],默认值:true刘德华liudehua
    – keep_joined_full_pinyin当启用此选项时,例如:> [],默认值:false刘德华liudehua
    – keep_none_chinese在结果中保留非中文字母或数字,默认值:true
    – keep_none_chinese_together将非中文字母放在一起,默认值:true
    – keep_none_chinese_in_first_letter在第一个字母中保留非中文字母
    – keep_none_chinese_in_joined_full_pinyin将非中文字母保留在连接的完整拼音中
    – none_chinese_pinyin_tokenize如果非中文字母是拼音,则将其拆分为单独的拼音术语,默认值:true
    – keep_original启用此选项时,也会保留原始输入,默认值:false, 使用此属性时, 因该设置为true, 要保留原始值
    – lowercase小写非中文字母,默认值:true
    – trim_whitespace默认值:true
    – remove_duplicated_term启用此选项时,将删除重复的术语以保存索引,默认值:false
    – ignore_pinyin_offset6.0之后,偏移量受到严格约束,不允许重叠标记,使用此参数,重叠标记将允许忽略偏移量,请注意,所有与位置相关的查询或突出显示都会变得不正确,您应该使用多个字段并指定不同的设置,用于不同的查询目的。如果您需要偏移量,请将其设置为 false。默认值:true。

整合示例

DSL

PUT /hotel
{
  "settings": {
    "analysis": {
      "analyzer": {
        "text_anlyzer": {
          "tokenizer": "ik_max_word",
          "filter": "py"
        }
      },
      "filter": {
        "py": {
          "type": "pinyin",
          "keep_full_pinyin": false,
          "keep_joined_full_pinyin": true,
          "keep_original": true,
          "limit_first_letter_length": 16,
          "remove_duplicated_term": true,
          "none_chinese_pinyin_tokenize": false
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "id":{
        "type": "keyword"
      },
      "name":{
        "type": "text",
        "analyzer": "text_anlyzer",
        "search_analyzer": "ik_max_word",
        "copy_to": "all"
      },
      "address":{
        "type": "keyword",
        "index": false
      },
      "price":{
        "type": "integer"
      },
      "score":{
        "type": "integer"
      },
      "brand":{
        "type": "keyword",
        "copy_to": "all"
      },
      "city":{
        "type": "keyword"
      },
      "starName":{
        "type": "keyword"
      },
      "business":{
        "type": "keyword",
        "copy_to": "all"
      },
      "location":{
        "type": "geo_point"
      },
      "pic":{
        "type": "keyword",
        "index": false
      },
      "all":{
        "type": "text",
        "analyzer": "text_anlyzer",
        "search_analyzer": "ik_max_word"
      },
	  "suggestion":{
        "type": "completion",
        "analyzer": "text_anlyzer",
		"search_analyzer": "ik_max_word"
      }
    }
  }
}

Java Bean

@Data
@NoArgsConstructor
public class HotelDoc {

    private Long id;

    private String name;

    private String address;

    private Integer price;

    private Integer score;

    private String brand;

    private String city;

    private String starName;

    private String business;

    private String pic;

    private String all;

	private String location;

    private Boolean isAD;

    private Object distance;//距离值

    private List<String> suggestion;//用于自动补全字段

    public HotelDoc(Hotel hotel) {
        this.id = hotel.getId();
        this.name = hotel.getName();
        this.address = hotel.getAddress();
        this.price = hotel.getPrice();
        this.score = hotel.getScore();
        this.brand = hotel.getBrand();
        this.city = hotel.getCity();
        this.starName = hotel.getStarName();
        this.business = hotel.getBusiness();
        this.pic = hotel.getPic();
        this.isAD = hotel.getIsAD();
		this.location = hotel.getLatitude() + ", " + hotel.getLongitude();

		//填充suggestion字段: brand  city business
        suggestion = new ArrayList<>();
        suggestion.add(hotel.getBrand());
        suggestion.add(hotel.getCity());
        suggestion.add(hotel.getBusiness());
    }
}

测试分词器

GET _analyze
{
  "text": "我有我的快乐和自由",
  "analyzer": "ik_max_word"
}