diff --git a/elastic search/elastic search.md b/elastic search/elastic search.md index d2e1a9b..8000628 100644 --- a/elastic search/elastic search.md +++ b/elastic search/elastic search.md @@ -899,4 +899,378 @@ GET /cooking_blog/_search > ##### `must_not` > `must_not`会淘汰不满足指定条件的文档 +## 使用Query DSL分析数据 +在使用kibana导入`sample ecommerce orders`的数据集后,其会创建一个名为`kibana_sample_data_ecommerce`的索引,其索引结构如下: +``` +{ + "kibana_sample_data_ecommerce": { + "mappings": { + "properties": { + "category": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "currency": { + "type": "keyword" + }, + "customer_birth_date": { + "type": "date" + }, + "customer_first_name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "customer_full_name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "customer_gender": { + "type": "keyword" + }, + "customer_id": { + "type": "keyword" + }, + "customer_last_name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "customer_phone": { + "type": "keyword" + }, + "day_of_week": { + "type": "keyword" + }, + "day_of_week_i": { + "type": "integer" + }, + "email": { + "type": "keyword" + }, + "event": { + "properties": { + "dataset": { + "type": "keyword" + } + } + }, + "geoip": { + "properties": { + "city_name": { + "type": "keyword" + }, + "continent_name": { + "type": "keyword" + }, + "country_iso_code": { + "type": "keyword" + }, + "location": { + "type": "geo_point" + }, + "region_name": { + "type": "keyword" + } + } + }, + "manufacturer": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "order_date": { + "type": "date" + }, + "order_id": { + "type": "keyword" + }, + "products": { + "properties": { + "_id": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "base_price": { + "type": "half_float" + }, + "base_unit_price": { + "type": "half_float" + }, + "category": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "created_on": { + "type": "date" + }, + "discount_amount": { + "type": "half_float" + }, + "discount_percentage": { + "type": "half_float" + }, + "manufacturer": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "min_price": { + "type": "half_float" + }, + "price": { + "type": "half_float" + }, + "product_id": { + "type": "long" + }, + "product_name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + }, + "analyzer": "english" + }, + "quantity": { + "type": "integer" + }, + "sku": { + "type": "keyword" + }, + "tax_amount": { + "type": "half_float" + }, + "taxful_price": { + "type": "half_float" + }, + "taxless_price": { + "type": "half_float" + }, + "unit_discount_amount": { + "type": "half_float" + } + } + }, + "sku": { + "type": "keyword" + }, + "taxful_total_price": { + "type": "half_float" + }, + "taxless_total_price": { + "type": "half_float" + }, + "total_quantity": { + "type": "integer" + }, + "total_unique_products": { + "type": "integer" + }, + "type": { + "type": "keyword" + }, + "user": { + "type": "keyword" + } + } + } + } +} +``` +其中,`geoip.properties, products.properties`都为嵌套的object类型,而`geo_point`类型则是用于地理坐标。 +### get metrics +#### 计算订单的平均值 +通过如下请求,可以计算数据集中所有订单的平均值: +``` +GET kibana_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "avg_order_value": { + "avg": { + "field": "taxful_total_price" + } + } + } +} +``` + +在上述请求体中,各属性分别代表如下含义: +- size: 将`size`设置为0可以避免在返回的结果中包含`匹配的文档`,size设置为0后,返回结果中只会包含聚合的结果 +- `avg_order_value`为该项metric的name +- `avg`为聚合类型,会计算算数平均 + +该请求的返回结果如下所示: +``` +{ + "took": 0, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4675, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "avg_order_value": { + "value": 75.05542864304813 + } + } +} +``` +返回结果中各属性含义如下所示: +- `hits.total.value`:代表数据集中的订单数量 +- `hits.hits`为空,因为请求体中设置了`size`为0 +- `aggregations`中包含聚合结果,请求体中为metric指定的结果为`avg_order_value`,故而该项metric位于`aggregations.avg_order_value` + +#### 在单个请求中计算订单的多个metrics +如果想要在单个请求中结算多个metrics,可以通过`stats`聚合类型: +``` +{ + "size":0, + "aggs":{ + "order_status":{ + "stats":{ + "field":"taxful_total_price" + } + } + } +} +``` +其中,`stats`聚合类型会返回count, min, max, avg, sum五个metrics。 + +其返回结果为 +``` +{ + "aggregations": { + "order_stats": { + "count": 4675, + "min": 6.98828125, + "max": 2250, + "avg": 75.05542864304813, + "sum": 350884.12890625 + } + } +} +``` + +#### 根据category对订单进行分组 +可以根据`terms`聚合类型来对订单进行分组, +``` +GET kibana_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "sales_by_category": { + "terms": { + "field": "category.keyword", + "size": 5, + "order": { "_count": "desc" } + } + } + } +} +``` +`terms`聚合类型会根据该字段的类型对文档进行分组 + +`"size":5`和` "order": { "_count": "desc" }`设置了只会返回最多的5个category + + +其返回结果如下所示: +``` +{ + "took": 4, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4675, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "sales_by_category": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 572, + "buckets": [ + { + "key": "Men's Clothing", + "doc_count": 2024 + }, + { + "key": "Women's Clothing", + "doc_count": 1903 + }, + { + "key": "Women's Shoes", + "doc_count": 1136 + }, + { + "key": "Men's Shoes", + "doc_count": 944 + }, + { + "key": "Women's Accessories", + "doc_count": 830 + } + ] + } + } +} +``` + +> #### doc_count_error_upper_bound +> 基于es的分布式结构,`terms aggregations`在多个shards上运行时,document计数可能会有小的误差,`doc_count_error_upper_bound`的代表计数的最大可能误差 + +- sum_other_doc_count: 由于当前请求体中设置了`aggs.sales_by_category.terms.size`为5,故而`sum_other_doc_count`代表未包含在返回结果中的文档数量 +- \ No newline at end of file