ElasticSearch + Kibana

Kibana 运维 ElasticSearch

基础使用

Insert

创建索引库, 并设定分片数量和副本数量

// 格式: PUT /索引库名称
PUT /xinmeng
{
  "settings": {
    # 设定分片数量
    "number_of_shards": 3,
    # 设定副本数量
    "number_of_replicas": 3
  }
}

给指定的索引库创建映射

// 给指定索引库的映射树添加映射字段
PUT /xinmeng/_mapping
{
  "properties":{
    "color": {
      "type": "keyword"
    }
  }
}

Delete

删除指定的索引库(普通删除)

// 格式: DELETE /索引库名称
DELETE /xinmeng

Search

查看指定索引库的映射树

// 格式: GET /索引库名称/_mapping
GET /xinmeng/_mapping

Close / Open 索引库

// 格式: POST /索引库名称/_close
POST /xinmeng/_close

// 格式: POST /索引库名称/_close
POST /xinmeng/_open

文档相关操作

Insert

添加文档, 指定文档id

/* 新增索引库数据: 
POST /索引库名称/文档名称
{
	"字段名称": "字段值",
	"字段名称": "字段值",
	"字段名称": 字段值(数字不需要双引号)
}
*/
// 若创建文档时, 不指定文档id, 则自动生成文档id, 以保证文档的唯一性
POST /xinmeng/_doc/7
{
	"name": "XinMeng",
	"age": 11,
	"sex": "gril"
}

Search

查询指定索引库指定文档id数据

// 格式: GET /索引库名称/文档名称/文档id
GET /xinmeng/_doc/3

查询指定索引库所有数据(普通查询)

// 格式: GET /索引库名称/_search
GET /xinmeng/_search

条件查询

/* 格式
GET /索引库名/_search
{
    "query":{
        "查询类型":{
            "查询条件":"查询条件值"
        }
    }
}
*/

Update

修改指定文档id的字段值

// ElasticSearch 对于修改字段值, 是以覆盖方式修改, 不可以单独修改某一字段的值
POST /xinmeng/_doc/7
{
	"name": "MengXin",
	"age": 9,
	"sex": "gril"
}

Delete

// 格式: DELETE /索引库名称/文档名称/文档id
DELETE /xinmeng/_doc/3

Create Maping

创建指定的索引, 并创建映射树

// 创建hotel索引库, 并创建映射树
PUT /hotel
{
  "mappings": {
    "properties": {
      "id": {
        "type": "keyword"
      },
      "name": {
        "type": "text",
        "analyzer": "ik_max_word",
        "copy_to": "all"
      },
      "address": {
        "type": "text",
        "analyzer": "ik_max_word"
      },
      "price": {
        "type": "integer"
      },
      "score": {
        "type": "integer"
      },
      "brand": {
        "type": "text",
        "analyzer": "ik_max_word",
        "copy_to": "all"
      },
      "city": {
        "type": "keyword"
      },
      "starName": {
        "type": "keyword"
      },
      "business": {
        "type": "keyword",
        "copy_to": "all"
      },
      "location": {
        "type": "geo_point"
      },
      "pic": {
        "type": "keyword",
        "index": false
      }, 
      "isAD": {
        "type": "boolean"
      },
      "all": {
        "type": "text",
        "analyzer": "ik_max_word"
      }
    }
  }
}

条件检索

全文检索

查询全部

GET /hotel/_search
{
  "query": {
    "match_all": {}
  },
  "size": 1000
}

单字段查询

GET /hotel/_search
{
  "query": {
    "match": {
      "name": "如家"
    }
  },
  "size": 1000
}

多字段查询

GET /hotel/_search
{
  "query": {
    "multi_match": {
      "query": "如家",
      "fields": ["name", "brand"]
    }
  },
  "size": 1000
}

精准查询 (不会对查询条件分词)

Term (精确值查询)

GET /hotel/_search
{
  "query": {
    "term": {
      "name": {
        "value": "如家"
      }
    }
  }
}

Range (范围查询)

// gte: 大于等于
// lte: 小于等于
GET /hotel/_search
{
  "query": {
    "range": {
      "price": {
        "gte": 99,
        "lte": 399
      }
    }
  }
}

地理位置查询

圆形

GET /hotel/_search
{
  "query": {
    "geo_distance": {
      "location": "31.21, 121.5",
      "distance": "15km"
    }
  }
}

矩形

GET /hotel/_search
{
  "query": {
    "geo_bounding_box": {
      "location": {
        "top_left":{
          "lat": "31.1",
          "lon": "121.5"
        },
        "bottom_right": {
          "lat": "30.9",
          "lon": "121.7"
        }
      }
    }
  }
}

组合条件查询

查询全国的酒店名称不为七天的上海的所有酒店, 价格范围大于等于99, 小于等于199, 而且查询的地址位置是31,21, 121.5, 范围半径20km

GET /hotel/_search
{
  "query": {
    "bool": {
      "must_not": [
        {
          "term": {
            "name": {
              "value": "七天"
            }
          }
        }
      ],
      "must": [
        {
          "range": {
            "price": {
              "gte": 99,
              "lte": 199
            }
          }
        }
      ],
      "filter": {
        "geo_distance": {
          "distance": "20km",
          "location": {
            "lat": 31.21,
            "lon": 121.5
          }
        }
      },
      "should": [
        {
          "term": {
            "city": {
              "value": "上海"
            }
          }
        }
      ]
    }
  }
}

must：必须匹配每个子查询，类似“与” (and)，must的条件参与算法的
should：选择性匹配子查询，类似“或”(or)
must_not：必须不匹配，不参与算分，类似“非” (not)
filter：效果和must一样的，都是and。必须匹配，filter的条件不参与算分

搜索时，参与打分的字段越多，查询的性能也越差。因此这种多条件查询时，建议：

搜索框的关键字搜索，是全文检索查询，使用must查询，参与算分
其它过滤条件，采用filter查询。不参与算分

算分查询

需求：查询如家酒店，对于深圳的如家进行+10分

GET /hotel/_search
{
  "query": {
    "function_score": {
      "query": {
        "match": {
          "all": "如家"
        }
      },
      "functions": [
        {
          "filter": {
            "term": {
              "city": "深圳"
            }
          },
          "weight": 10
        }
      ],
      "boost_mode": "sum"
    }
  },
  "size": 30
}

function score 查询中包含四部分内容：

原始查询条件：query部分，基于这个条件搜索文档，并且基于BM25算法给文档打分，原始算分（query score)
过滤条件：filter部分，符合该条件的文档才会重新算分
算分函数：符合filter条件的文档要根据这个函数做运算，得到的函数算分（function score），有四种函数
weight：函数结果是常量
field_value_factor：以文档中的某个字段值作为函数结果
random_score：以随机数作为函数结果
script_score：自定义算分函数算法
运算模式：算分函数的结果、原始查询的相关性算分，两者之间的运算方式，包括：
multiply：相乘
replace：用function score替换query score
其它，例如：sum、avg、max、min

排序查询

查找离设定经纬度坐标最近的如家酒店

GET /hotel/_search
{
  "query": {
    "match": {
      "all": "如家"
    }
  },
  "sort": [
    {
      "_geo_distance": {
        "location": "39.76,116.33",
        "order": "asc",
        "unit": "km"
      }
    }
  ],
  "from": 0,
  "size": 7
}

# 或
GET /hotel/_search
{
  "query": {
    "match": {
      "all": "如家"
    }
  },
  "sort": [
    {
      "_geo_distance": {
        "location": {
          "lat": 39.76,
          "lon": 116.33
        },
        "order": "asc",
        "unit": "km"
      }
    }
  ]
}

高亮显示

查询如家酒店, 并高亮显示为粉色

GET /hotel/_search
{
  "query": {
    "match": {
      "all": "如家"
    }
  },
  "highlight": {
    "fields": {
      "name": {
        "require_field_match": "false"
      }
    },
    "pre_tags": "<font color='pink'>",
    "post_tags": "</font>"
  }
}

聚合查询

Bucket (桶聚合)

聚合查询: 桶聚合, 查询全国每个城市的酒店数量

GET /hotel/_search
{
  "size": 0,
  "aggs": {
    "cityAgg": {
      "terms": {
        "field": "city",
        "order": {
          "_count": "asc"
        }, 
        "size": 7
      }
    }
  }
}

查询上海的酒店，按照酒店品牌统计每一个品牌的酒店数量。比如：如家酒店

GET /hotel/_search
{
  "query": {
    "term": {
      "city": {
        "value": "上海"
      }
    }
  },
  "size": 0,
  "aggs": {
    "brandAggs": {
      "terms": {
        "field": "brand",
        "size": 100,
        "order": {
          "_count": "asc"
        }
      }
    }
  }
}

度量聚合

统计每个品牌酒店的用户评分的min、max、avg等值

# 需求：统计酒店的用户评分的平均分

GET hotel/_search
{
  "size": 0,
  "aggs": {
    "avgAggs": {
       "avg": {
         "field": "score"
       }
    }
  }
}

酒店的最高分

GET hotel/_search
{
  "size": 0,
  "aggs": {
    "maxAggs": {
       "max": {
         "field": "score"
       }
    }
  }
}

酒店的最高分

GET hotel/_search
{
  "size": 0,
  "aggs": {
    "minAggs": {
       "min": {
         "field": "score"
       }
    }
  }
}

stats 求出最大值、最小值、平均值…

GET hotel/_search
{
  "size": 0,
  "aggs": {
    "statsAggs": {
       "stats": {
         "field": "score"
       }
    }
  }
}

在聚合的之前加入限制条件 query

统计北京的酒店的品牌多少家？(限制条件用在桶聚合)

GET hotel/_search
{
  "query": {
    "term": {
      "city": {
        "value": "北京"
      }
    }
  },
  "size": 0, 
  "aggs": {
    "brandAgg": {
      "terms": {
        "field": "brand",
        "size": 30
      }
    }
  }
}

统计上海的酒店的用户评分的平均分？(限制条件用在度量聚合)

GET hotel/_search
{
   "query": {
    "term": {
      "city": {
        "value": "上海"
      }
    }
  },
  "size": 0,
  "aggs": {
    "scoreAvg": {
      "avg": {
        "field": "score"
      }
    }
  }
}

管道聚合（pipline聚合）一个聚合的结果作为另一个聚合的条件

需求：统计上海的酒店的每个品牌的平均分？

默认排序（根据第一个聚合的总数倒序显示）

GET hotel/_search
{
  "query": {
    "term": {
      "city": {
        "value": "上海"
      }
    }
  },
  "size": 0,
  "aggs": {
    "brandAggs": {
      "terms": {
        "field": "brand",
        "size": 100,
        "order": {
          "avgAggs": "desc"
        }
      },
      "aggs": {
        "avgAggs": {
          "avg": {
            "field": "score"
          }
        }
      }
    }
  }
}

自动补全查询

GET hotel/_search
{
  "suggest": {
    "hotelSuggestion": {
      "text": "北",	// 查询内容
      "completion":{
        "field":"suggestion",	//自动补全字段, 对应JavaBean中定义的自动补全属性
        "skip_duplicates":true,
        "size":10
      }
    }
  }
}

自定义分词器

// analyzer: 默认情况下，构建和搜索索引库时都使用该分词器，但如果存在search_analyzer属性后，只在构建时使用
// search_analyzer：只用在搜索索引库时
PUT /test
{
  "settings": {
    "analysis": {
      "analyzer": { 
        "my_analyzer": { 
          "tokenizer": "ik_smart",
          "filter": "py"
        }
      },
      "filter": {
        "py": { 
          "type": "pinyin",
          "keep_full_pinyin": false,
          "keep_joined_full_pinyin": true,
          "keep_original": true,
          "limit_first_letter_length": 16,
          "remove_duplicated_term": true,
          "none_chinese_pinyin_tokenize": false
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "name":{
        "type": "text",
        "analyzer": "my_analyzer",
        "search_analyzer": "ik_max_word"
      }
    }
  }
}

完整示例

# 构建拼音分词器索引库
PUT /maomao_index
{
  "settings": {
    "analysis": {
      "analyzer": {
        "maomao_pinyin_analyzer": {
          "tokenizer": "ik_max_word",
          "filter": "pinyin"
        }
      },
      "filter": {
        "pinyin": {
          "type": "pinyin", # 设定分词器类型为拼音
          "keep_first_leeter": true, # 文字的拼音首字母
          "keep_separate_first_letter": false, # 不单独保留首字母
          "keep_full_pinyin": true, # 每个文字全拼
          "limit_first_letter_length": 16, # 设置first_letter结果的最大长度为16
          "keep_joined_full_pinyin": true, # 开启连拼(全部)
          "keep_none_chinese": true, # 在结果中保留非中文字母或数字
          "keep_none_chinese_together": true, # 将非中文字母放在一起
          "keep_none_chinese_in_first_letter": true, # 将非中文字母保留在首字母中
          "keep_none_chinese_in_joined_full_pinyin": true, # 将非中文字母保留在连接的完整拼音中
          "none_chinese_pinyin_tokenize": true, # 将非中文字母分解为单独的拼音术语（如果是拼音）
          "keep_original": true, # 启用此选项后，也将保留原始输入
          "lowercase": true, # 小写非中文字母
          "trim_whitespace": true,
          "remove_duplicated_term": false, # 将删除重复的术语以保存索引
          "ignore_pinyin_offset": true # 重叠令牌将允许忽略偏移量
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "id": {
        "type": "keyword",
        "index": true
      },
      "name": {
        "type": "text",
        "analyzer": "maomao_pinyin_analyzer",
        "search_analyzer": "ik_max_word",
        "copy_to": "maomaoAll",
        "index": true
      },
      "address": {
        "type": "text",
        "analyzer": "maomao_pinyin_analyzer",
        "search_analyzer": "ik_max_word",
        "copy_to": "maomaoAll",
        "index": true
      },
      "price": {
        "type": "integer",
        "index": true
      },
      "score": {
        "type": "integer",
        "index": true
      },
      "brand": {
        "type": "keyword",
        "copy_to": "maomaoAll",
        "index": true
      },
      "city": {
        "type": "keyword",
        "index": true
      },
      "starName": {
        "type": "keyword",
        "index": true
      },
      "business": {
        "type": "text",
        "analyzer": "maomao_pinyin_analyzer",
        "search_analyzer": "ik_max_word",
        "copy_to": "maomaoAll",
        "index": true
      },
      "pic": {
        "type": "keyword",
        "index": false
      },
      "isAD": {
        "type": "keyword",
        "index": true
      },
      "location": {
        "type": "geo_point",
        "index": true
      },
      "maomaoAll": {
        "type": "text",
        "analyzer": "maomao_pinyin_analyzer",
        "search_analyzer": "ik_max_word",
        "index": true
      }
    }
  }
}

自定义分词器官方地址(GitHub)

可选参数
– keep_first_letter启用此选项时，默认值：true, 例如：> 刘德华ldh
– keep_separate_first_letter启用此选项时，将分别保留首字母，默认值：false
– limit_first_letter_length设置first_letter结果的最大长度，默认值：16
– keep_full_pinyin当启用此选项时，例如：> [，，]，默认值：true刘德华liudehua
– keep_joined_full_pinyin当启用此选项时，例如：> []，默认值：false刘德华liudehua
– keep_none_chinese在结果中保留非中文字母或数字，默认值：true
– keep_none_chinese_together将非中文字母放在一起，默认值：true
– keep_none_chinese_in_first_letter在第一个字母中保留非中文字母
– keep_none_chinese_in_joined_full_pinyin将非中文字母保留在连接的完整拼音中
– none_chinese_pinyin_tokenize如果非中文字母是拼音，则将其拆分为单独的拼音术语，默认值：true
– keep_original启用此选项时，也会保留原始输入，默认值：false, 使用此属性时, 因该设置为true, 要保留原始值
– lowercase小写非中文字母，默认值：true
– trim_whitespace默认值：true
– remove_duplicated_term启用此选项时，将删除重复的术语以保存索引，默认值：false
– ignore_pinyin_offset6.0之后，偏移量受到严格约束，不允许重叠标记，使用此参数，重叠标记将允许忽略偏移量，请注意，所有与位置相关的查询或突出显示都会变得不正确，您应该使用多个字段并指定不同的设置，用于不同的查询目的。如果您需要偏移量，请将其设置为 false。默认值：true。

整合示例

DSL

PUT /hotel
{
  "settings": {
    "analysis": {
      "analyzer": {
        "text_anlyzer": {
          "tokenizer": "ik_max_word",
          "filter": "py"
        }
      },
      "filter": {
        "py": {
          "type": "pinyin",
          "keep_full_pinyin": false,
          "keep_joined_full_pinyin": true,
          "keep_original": true,
          "limit_first_letter_length": 16,
          "remove_duplicated_term": true,
          "none_chinese_pinyin_tokenize": false
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "id":{
        "type": "keyword"
      },
      "name":{
        "type": "text",
        "analyzer": "text_anlyzer",
        "search_analyzer": "ik_max_word",
        "copy_to": "all"
      },
      "address":{
        "type": "keyword",
        "index": false
      },
      "price":{
        "type": "integer"
      },
      "score":{
        "type": "integer"
      },
      "brand":{
        "type": "keyword",
        "copy_to": "all"
      },
      "city":{
        "type": "keyword"
      },
      "starName":{
        "type": "keyword"
      },
      "business":{
        "type": "keyword",
        "copy_to": "all"
      },
      "location":{
        "type": "geo_point"
      },
      "pic":{
        "type": "keyword",
        "index": false
      },
      "all":{
        "type": "text",
        "analyzer": "text_anlyzer",
        "search_analyzer": "ik_max_word"
      },
	  "suggestion":{
        "type": "completion",
        "analyzer": "text_anlyzer",
		"search_analyzer": "ik_max_word"
      }
    }
  }
}

Java Bean

@Data
@NoArgsConstructor
public class HotelDoc {

    private Long id;

    private String name;

    private String address;

    private Integer price;

    private Integer score;

    private String brand;

    private String city;

    private String starName;

    private String business;

    private String pic;

    private String all;

	private String location;

    private Boolean isAD;

    private Object distance;//距离值

    private List<String> suggestion;//用于自动补全字段

    public HotelDoc(Hotel hotel) {
        this.id = hotel.getId();
        this.name = hotel.getName();
        this.address = hotel.getAddress();
        this.price = hotel.getPrice();
        this.score = hotel.getScore();
        this.brand = hotel.getBrand();
        this.city = hotel.getCity();
        this.starName = hotel.getStarName();
        this.business = hotel.getBusiness();
        this.pic = hotel.getPic();
        this.isAD = hotel.getIsAD();
		this.location = hotel.getLatitude() + ", " + hotel.getLongitude();

		//填充suggestion字段: brand  city business
        suggestion = new ArrayList<>();
        suggestion.add(hotel.getBrand());
        suggestion.add(hotel.getCity());
        suggestion.add(hotel.getBusiness());
    }
}

测试分词器

GET _analyze
{
  "text": "我有我的快乐和自由",
  "analyzer": "ik_max_word"
}

Kibana 运维 ElasticSearch

基础使用

Insert

创建索引库, 并设定 分片数量 和 副本数量

给指定的索引库创建映射

Delete

删除指定的索引库(普通删除)

Search

查看指定索引库的映射树

Close / Open 索引库

文档相关操作

Insert

添加文档, 指定文档id

Search

查询指定索引库指定文档id数据

查询指定索引库所有数据(普通查询)

条件查询

Update

修改指定文档id的字段值

Delete

Create Maping

创建指定的索引, 并创建映射树

条件检索

全文检索

查询全部

单字段查询

多字段查询

精准查询 (不会对查询条件分词)

Term (精确值查询)

Range (范围查询)

地理位置查询

圆形

矩形

组合条件查询

算分查询

排序查询

高亮显示

聚合查询

Bucket (桶聚合)

度量聚合

在聚合的之前加入限制条件 query

管道聚合（pipline聚合）一个聚合的结果作为另一个聚合的条件

自动补全查询

自定义分词器

完整示例

整合示例

创建索引库, 并设定分片数量和副本数量