From 02f40d7efd4265e85c128d0a85a65c668c016b97 Mon Sep 17 00:00:00 2001 From: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Date: Wed, 12 Jul 2023 11:39:20 -0400 Subject: [PATCH] Refactor metric aggregation section (#4543) * Refactor metric aggregation section Signed-off-by: Fanit Kolchina * Update _query-dsl/aggregations/metric/matrix-stats.md Co-authored-by: Naarcha-AWS <97990722+Naarcha-AWS@users.noreply.github.com> Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> * Update _query-dsl/aggregations/metric/extended-stats.md Co-authored-by: Naarcha-AWS <97990722+Naarcha-AWS@users.noreply.github.com> Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> --------- Signed-off-by: Fanit Kolchina Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Co-authored-by: Naarcha-AWS <97990722+Naarcha-AWS@users.noreply.github.com> --- .../aggregations/bucket/adjacency-matrix.md | 1 + .../aggregations/bucket/date-histogram.md | 1 + _query-dsl/aggregations/bucket/date-range.md | 1 + .../bucket/diversified-sampler.md | 1 + _query-dsl/aggregations/bucket/filter.md | 1 + _query-dsl/aggregations/bucket/filters.md | 1 + .../aggregations/bucket/geo-distance.md | 1 + .../aggregations/bucket/geohash-grid.md | 1 + _query-dsl/aggregations/bucket/geohex-grid.md | 12 + .../aggregations/bucket/geotile-grid.md | 11 + _query-dsl/aggregations/bucket/global.md | 1 + _query-dsl/aggregations/bucket/histogram.md | 1 + _query-dsl/aggregations/bucket/ip-range.md | 1 + _query-dsl/aggregations/bucket/missing.md | 1 + _query-dsl/aggregations/bucket/multi-terms.md | 1 + _query-dsl/aggregations/bucket/nested.md | 3 + _query-dsl/aggregations/bucket/range.md | 1 + .../aggregations/bucket/reverse-nested.md | 1 + _query-dsl/aggregations/bucket/sampler.md | 1 + .../aggregations/bucket/significant-terms.md | 1 + .../aggregations/bucket/significant-text.md | 1 + _query-dsl/aggregations/bucket/terms.md | 1 + _query-dsl/aggregations/metric-agg.md | 638 ------------------ _query-dsl/aggregations/metric/average.md | 56 ++ _query-dsl/aggregations/metric/cardinality.md | 60 ++ .../aggregations/metric/extended-stats.md | 75 ++ _query-dsl/aggregations/metric/geobounds.md | 48 ++ _query-dsl/aggregations/metric/index.md | 28 + .../aggregations/metric/matrix-stats.md | 85 +++ _query-dsl/aggregations/metric/maximum.md | 56 ++ _query-dsl/aggregations/metric/minimum.md | 56 ++ .../aggregations/metric/percentile-ranks.md | 45 ++ _query-dsl/aggregations/metric/percentile.md | 52 ++ .../aggregations/metric/scripted-metric.md | 71 ++ _query-dsl/aggregations/metric/stats.md | 44 ++ _query-dsl/aggregations/metric/sum.md | 56 ++ _query-dsl/aggregations/metric/top-hits.md | 147 ++++ _query-dsl/aggregations/metric/value-count.md | 40 ++ 38 files changed, 964 insertions(+), 638 deletions(-) delete mode 100644 _query-dsl/aggregations/metric-agg.md create mode 100644 _query-dsl/aggregations/metric/average.md create mode 100644 _query-dsl/aggregations/metric/cardinality.md create mode 100644 _query-dsl/aggregations/metric/extended-stats.md create mode 100644 _query-dsl/aggregations/metric/geobounds.md create mode 100644 _query-dsl/aggregations/metric/index.md create mode 100644 _query-dsl/aggregations/metric/matrix-stats.md create mode 100644 _query-dsl/aggregations/metric/maximum.md create mode 100644 _query-dsl/aggregations/metric/minimum.md create mode 100644 _query-dsl/aggregations/metric/percentile-ranks.md create mode 100644 _query-dsl/aggregations/metric/percentile.md create mode 100644 _query-dsl/aggregations/metric/scripted-metric.md create mode 100644 _query-dsl/aggregations/metric/stats.md create mode 100644 _query-dsl/aggregations/metric/sum.md create mode 100644 _query-dsl/aggregations/metric/top-hits.md create mode 100644 _query-dsl/aggregations/metric/value-count.md diff --git a/_query-dsl/aggregations/bucket/adjacency-matrix.md b/_query-dsl/aggregations/bucket/adjacency-matrix.md index 7e7a4f15ea..d1c08a95c8 100644 --- a/_query-dsl/aggregations/bucket/adjacency-matrix.md +++ b/_query-dsl/aggregations/bucket/adjacency-matrix.md @@ -43,6 +43,7 @@ GET opensearch_dashboards_sample_data_ecommerce/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/date-histogram.md b/_query-dsl/aggregations/bucket/date-histogram.md index ecc76f392c..cd3eb6585f 100644 --- a/_query-dsl/aggregations/bucket/date-histogram.md +++ b/_query-dsl/aggregations/bucket/date-histogram.md @@ -26,6 +26,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/date-range.md b/_query-dsl/aggregations/bucket/date-range.md index 2c2ac80405..12ec53aa16 100644 --- a/_query-dsl/aggregations/bucket/date-range.md +++ b/_query-dsl/aggregations/bucket/date-range.md @@ -31,6 +31,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/diversified-sampler.md b/_query-dsl/aggregations/bucket/diversified-sampler.md index af8e94c690..993173cbaa 100644 --- a/_query-dsl/aggregations/bucket/diversified-sampler.md +++ b/_query-dsl/aggregations/bucket/diversified-sampler.md @@ -31,6 +31,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/filter.md b/_query-dsl/aggregations/bucket/filter.md index 0af3e99250..6ed9901826 100644 --- a/_query-dsl/aggregations/bucket/filter.md +++ b/_query-dsl/aggregations/bucket/filter.md @@ -36,6 +36,7 @@ GET opensearch_dashboards_sample_data_ecommerce/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/filters.md b/_query-dsl/aggregations/bucket/filters.md index c054817ca0..6315faadbc 100644 --- a/_query-dsl/aggregations/bucket/filters.md +++ b/_query-dsl/aggregations/bucket/filters.md @@ -45,6 +45,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/geo-distance.md b/_query-dsl/aggregations/bucket/geo-distance.md index 80c4128c31..4424e839fa 100644 --- a/_query-dsl/aggregations/bucket/geo-distance.md +++ b/_query-dsl/aggregations/bucket/geo-distance.md @@ -113,6 +113,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/geohash-grid.md b/_query-dsl/aggregations/bucket/geohash-grid.md index 0181ac7e2b..2c382647b5 100644 --- a/_query-dsl/aggregations/bucket/geohash-grid.md +++ b/_query-dsl/aggregations/bucket/geohash-grid.md @@ -28,6 +28,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/geohex-grid.md b/_query-dsl/aggregations/bucket/geohex-grid.md index 23d05715ee..394f53642b 100644 --- a/_query-dsl/aggregations/bucket/geohex-grid.md +++ b/_query-dsl/aggregations/bucket/geohex-grid.md @@ -37,6 +37,7 @@ PUT national_parks } } ``` +{% include copy-curl.html %} Index the following documents into the sample index: @@ -46,19 +47,26 @@ PUT national_parks/_doc/1 "name": "Yellowstone National Park", "location": "44.42, -110.59" } +``` +{% include copy-curl.html %} +```json PUT national_parks/_doc/2 { "name": "Yosemite National Park", "location": "37.87, -119.53" } +``` +{% include copy-curl.html %} +```json PUT national_parks/_doc/3 { "name": "Death Valley National Park", "location": "36.53, -116.93" } ``` +{% include copy-curl.html %} You can index geopoints in several formats. For a list of all supported formats, see the [geopoint documentation]({{site.url}}{{site.baseurl}}/opensearch/supported-field-types/geo-point#formats). {: .note} @@ -80,6 +88,7 @@ GET national_parks/_search } } ``` +{% include copy-curl.html %} You can use either the `GET` or `POST` HTTP method for geohex grid aggregation queries. {: .note} @@ -166,6 +175,7 @@ GET national_parks/_search } } ``` +{% include copy-curl.html %} All three documents are bucketed separately because of higher granularity: @@ -266,6 +276,7 @@ GET national_parks/_search } } ``` +{% include copy-curl.html %} The response contains the two documents that are within the `geo_bounding_box` bounds: @@ -327,6 +338,7 @@ GET national_parks/_search } } ``` +{% include copy-curl.html %} The response contains only the two results that are within the specified bounds: diff --git a/_query-dsl/aggregations/bucket/geotile-grid.md b/_query-dsl/aggregations/bucket/geotile-grid.md index 5697772c0f..e254041554 100644 --- a/_query-dsl/aggregations/bucket/geotile-grid.md +++ b/_query-dsl/aggregations/bucket/geotile-grid.md @@ -30,6 +30,7 @@ PUT national_parks } } ``` +{% include copy-curl.html %} Index the following documents into the sample index: @@ -39,19 +40,26 @@ PUT national_parks/_doc/1 "name": "Yellowstone National Park", "location": "44.42, -110.59" } +``` +{% include copy-curl.html %} +```json PUT national_parks/_doc/2 { "name": "Yosemite National Park", "location": "37.87, -119.53" } +``` +{% include copy-curl.html %} +```json PUT national_parks/_doc/3 { "name": "Death Valley National Park", "location": "36.53, -116.93" } ``` +{% include copy-curl.html %} You can index geopoints in several formats. For a list of all supported formats, see the [geopoint documentation]({{site.url}}{{site.baseurl}}/opensearch/supported-field-types/geo-point#formats). {: .note} @@ -73,6 +81,7 @@ GET national_parks/_search } } ``` +{% include copy-curl.html %} You can use either the `GET` or `POST` HTTP method for geotile grid aggregation queries. {: .note} @@ -155,6 +164,7 @@ GET national_parks/_search } } ``` +{% include copy-curl.html %} All three documents are bucketed separately because of higher granularity: @@ -245,6 +255,7 @@ GET national_parks/_search } } ``` +{% include copy-curl.html %} The response contains only the two results that are within the specified bounds: diff --git a/_query-dsl/aggregations/bucket/global.md b/_query-dsl/aggregations/bucket/global.md index 07508e4617..d72150af48 100644 --- a/_query-dsl/aggregations/bucket/global.md +++ b/_query-dsl/aggregations/bucket/global.md @@ -37,6 +37,7 @@ GET opensearch_dashboards_sample_data_ecommerce/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/histogram.md b/_query-dsl/aggregations/bucket/histogram.md index 11ea2fa75b..9f7a540f2b 100644 --- a/_query-dsl/aggregations/bucket/histogram.md +++ b/_query-dsl/aggregations/bucket/histogram.md @@ -28,6 +28,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/ip-range.md b/_query-dsl/aggregations/bucket/ip-range.md index 35b2cb8d43..7adeaf0b7a 100644 --- a/_query-dsl/aggregations/bucket/ip-range.md +++ b/_query-dsl/aggregations/bucket/ip-range.md @@ -33,6 +33,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/missing.md b/_query-dsl/aggregations/bucket/missing.md index 03802930be..ed7151639e 100644 --- a/_query-dsl/aggregations/bucket/missing.md +++ b/_query-dsl/aggregations/bucket/missing.md @@ -27,6 +27,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` +{% include copy-curl.html %} Because the default value for the `min_doc_count` parameter is 1, the `missing` parameter doesn't return any buckets in its response. Set `min_doc_count` parameter to 0 to see the "N/A" bucket in the response: diff --git a/_query-dsl/aggregations/bucket/multi-terms.md b/_query-dsl/aggregations/bucket/multi-terms.md index d02c2b4cb1..ff3b822ef9 100644 --- a/_query-dsl/aggregations/bucket/multi-terms.md +++ b/_query-dsl/aggregations/bucket/multi-terms.md @@ -45,6 +45,7 @@ GET sample-index100/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/nested.md b/_query-dsl/aggregations/bucket/nested.md index abf5767dcd..3df9bb9f77 100644 --- a/_query-dsl/aggregations/bucket/nested.md +++ b/_query-dsl/aggregations/bucket/nested.md @@ -28,6 +28,7 @@ PUT logs/_doc/0 ] } ``` +{% include copy-curl.html %} OpenSearch merges all sub-properties of the entity relations that looks something like this: @@ -60,6 +61,7 @@ PUT logs } } ``` +{% include copy-curl.html %} Nested documents allow you to index the same JSON document but will keep your pages in separate Lucene documents, making only searches like `pages=landing` and `load_time=200` return the expected result. Internally, nested objects index each object in the array as a separate hidden document, meaning that each nested object can be queried independently of the others. @@ -84,6 +86,7 @@ GET logs/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/range.md b/_query-dsl/aggregations/bucket/range.md index db8c11534c..ec82c8c9a1 100644 --- a/_query-dsl/aggregations/bucket/range.md +++ b/_query-dsl/aggregations/bucket/range.md @@ -40,6 +40,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` +{% include copy-curl.html %} The response includes the `from` key values and excludes the `to` key values: diff --git a/_query-dsl/aggregations/bucket/reverse-nested.md b/_query-dsl/aggregations/bucket/reverse-nested.md index 2332539feb..1be7e91535 100644 --- a/_query-dsl/aggregations/bucket/reverse-nested.md +++ b/_query-dsl/aggregations/bucket/reverse-nested.md @@ -47,6 +47,7 @@ GET logs/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/sampler.md b/_query-dsl/aggregations/bucket/sampler.md index b50e9ab872..3668f3c755 100644 --- a/_query-dsl/aggregations/bucket/sampler.md +++ b/_query-dsl/aggregations/bucket/sampler.md @@ -49,6 +49,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/significant-terms.md b/_query-dsl/aggregations/bucket/significant-terms.md index 2426aa4ce3..017e3b7dd8 100644 --- a/_query-dsl/aggregations/bucket/significant-terms.md +++ b/_query-dsl/aggregations/bucket/significant-terms.md @@ -35,6 +35,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/significant-text.md b/_query-dsl/aggregations/bucket/significant-text.md index f1a832c463..1c136603d6 100644 --- a/_query-dsl/aggregations/bucket/significant-text.md +++ b/_query-dsl/aggregations/bucket/significant-text.md @@ -48,6 +48,7 @@ GET shakespeare/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/bucket/terms.md b/_query-dsl/aggregations/bucket/terms.md index 162f80e623..229ded6133 100644 --- a/_query-dsl/aggregations/bucket/terms.md +++ b/_query-dsl/aggregations/bucket/terms.md @@ -26,6 +26,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` +{% include copy-curl.html %} #### Example response diff --git a/_query-dsl/aggregations/metric-agg.md b/_query-dsl/aggregations/metric-agg.md deleted file mode 100644 index 89f35b7253..0000000000 --- a/_query-dsl/aggregations/metric-agg.md +++ /dev/null @@ -1,638 +0,0 @@ ---- -layout: default -title: Metric aggregations -parent: Aggregations -nav_order: 2 -permalink: /aggregations/metric-agg/ -redirect_from: - - /opensearch/metric-agg/ ---- - -# Metric aggregations - -Metric aggregations let you perform simple calculations such as finding the minimum, maximum, and average values of a field. - -## Types of metric aggregations - -Metric aggregations are of two types: single-value metric aggregations and multi-value metric aggregations. - -### Single-value metric aggregations - -Single-value metric aggregations return a single metric. For example, `sum`, `min`, `max`, `avg`, `cardinality`, and `value_count`. - -### Multi-value metric aggregations - -Multi-value metric aggregations return more than one metric. For example, `stats`, `extended_stats`, `matrix_stats`, `percentile`, `percentile_ranks`, `geo_bound`, `top_hits`, and `scripted_metric`. - -## sum, min, max, avg - -The `sum`, `min`, `max`, and `avg` metrics are single-value metric aggregations that return the sum, minimum, maximum, and average values of a field, respectively. - -The following example calculates the total sum of the `taxful_total_price` field: - -```json -GET opensearch_dashboards_sample_data_ecommerce/_search -{ - "size": 0, - "aggs": { - "sum_taxful_total_price": { - "sum": { - "field": "taxful_total_price" - } - } - } -} -``` - -#### Example response - -```json -... - "aggregations" : { - "sum_taxful_total_price" : { - "value" : 350884.12890625 - } - } -} -``` - -In a similar fashion, you can find the minimum, maximum, and average values of a field. - -## cardinality - -The `cardinality` metric is a single-value metric aggregation that counts the number of unique or distinct values of a field. - -The following example finds the number of unique products in an eCommerce store: - -```json -GET opensearch_dashboards_sample_data_ecommerce/_search -{ - "size": 0, - "aggs": { - "unique_products": { - "cardinality": { - "field": "products.product_id" - } - } - } -} -``` - -#### Example response - -```json -... - "aggregations" : { - "unique_products" : { - "value" : 7033 - } - } -} -``` - -Cardinality count is approximate. -If you have tens of thousands of products in your hypothetical store, an accurate cardinality calculation requires loading all the values into a hash set and returning its size. This approach doesn't scale well; it requires huge amounts of memory and can cause high latencies. - -You can control the trade-off between memory and accuracy with the `precision_threshold` setting. This setting defines the threshold below which counts are expected to be close to accurate. Above this value, counts might become a bit less accurate. The default value of `precision_threshold` is 3,000. The maximum supported value is 40,000. - -```json -GET opensearch_dashboards_sample_data_ecommerce/_search -{ - "size": 0, - "aggs": { - "unique_products": { - "cardinality": { - "field": "products.product_id", - "precision_threshold": 10000 - } - } - } -} -``` - -## value_count - -The `value_count` metric is a single-value metric aggregation that calculates the number of values that an aggregation is based on. - -For example, you can use the `value_count` metric with the `avg` metric to find how many numbers the aggregation uses to calculate an average value. - -```json -GET opensearch_dashboards_sample_data_ecommerce/_search -{ - "size": 0, - "aggs": { - "number_of_values": { - "value_count": { - "field": "taxful_total_price" - } - } - } -} -``` - -#### Example response - -```json -... - "aggregations" : { - "number_of_values" : { - "value" : 4675 - } - } -} -``` - -## stats, extended_stats, matrix_stats - -The `stats` metric is a multi-value metric aggregation that returns all basic metrics such as `min`, `max`, `sum`, `avg`, and `value_count` in one aggregation query. - -The following example returns the basic stats for the `taxful_total_price` field: - -```json -GET opensearch_dashboards_sample_data_ecommerce/_search -{ - "size": 0, - "aggs": { - "stats_taxful_total_price": { - "stats": { - "field": "taxful_total_price" - } - } - } -} -``` - -#### Example response - -```json -... -"aggregations" : { - "stats_taxful_total_price" : { - "count" : 4675, - "min" : 6.98828125, - "max" : 2250.0, - "avg" : 75.05542864304813, - "sum" : 350884.12890625 - } - } -} -``` - -The `extended_stats` aggregation is an extended version of the `stats` aggregation. Apart from including basic stats, `extended_stats` also returns stats such as `sum_of_squares`, `variance`, and `std_deviation`. - -```json -GET opensearch_dashboards_sample_data_ecommerce/_search -{ - "size": 0, - "aggs": { - "extended_stats_taxful_total_price": { - "extended_stats": { - "field": "taxful_total_price" - } - } - } -} -``` - -#### Example response - -```json -... -"aggregations" : { - "extended_stats_taxful_total_price" : { - "count" : 4675, - "min" : 6.98828125, - "max" : 2250.0, - "avg" : 75.05542864304813, - "sum" : 350884.12890625, - "sum_of_squares" : 3.9367749294174194E7, - "variance" : 2787.59157113862, - "variance_population" : 2787.59157113862, - "variance_sampling" : 2788.187974983536, - "std_deviation" : 52.79764740155209, - "std_deviation_population" : 52.79764740155209, - "std_deviation_sampling" : 52.80329511482722, - "std_deviation_bounds" : { - "upper" : 180.6507234461523, - "lower" : -30.53986616005605, - "upper_population" : 180.6507234461523, - "lower_population" : -30.53986616005605, - "upper_sampling" : 180.66201887270256, - "lower_sampling" : -30.551161586606312 - } - } - } -} -``` - -The `std_deviation_bounds` object provides a visual variance of the data with an interval of plus/minus two standard deviations from the mean. -To set the standard deviation to a different value, say 3, set `sigma` to 3: - -```json -GET opensearch_dashboards_sample_data_ecommerce/_search -{ - "size": 0, - "aggs": { - "extended_stats_taxful_total_price": { - "extended_stats": { - "field": "taxful_total_price", - "sigma": 3 - } - } - } -} -``` - -The `matrix_stats` aggregation generates advanced stats for multiple fields in a matrix form. -The following example returns advanced stats in a matrix form for the `taxful_total_price` and `products.base_price` fields: - -```json -GET opensearch_dashboards_sample_data_ecommerce/_search -{ - "size": 0, - "aggs": { - "matrix_stats_taxful_total_price": { - "matrix_stats": { - "fields": ["taxful_total_price", "products.base_price"] - } - } - } -} -``` - -#### Example response - -```json -... -"aggregations" : { - "matrix_stats_taxful_total_price" : { - "doc_count" : 4675, - "fields" : [ - { - "name" : "products.base_price", - "count" : 4675, - "mean" : 34.994239430147196, - "variance" : 360.5035285833703, - "skewness" : 5.530161335032702, - "kurtosis" : 131.16306324042148, - "covariance" : { - "products.base_price" : 360.5035285833703, - "taxful_total_price" : 846.6489362233166 - }, - "correlation" : { - "products.base_price" : 1.0, - "taxful_total_price" : 0.8444765264325268 - } - }, - { - "name" : "taxful_total_price", - "count" : 4675, - "mean" : 75.05542864304839, - "variance" : 2788.1879749835402, - "skewness" : 15.812149139924037, - "kurtosis" : 619.1235507385902, - "covariance" : { - "products.base_price" : 846.6489362233166, - "taxful_total_price" : 2788.1879749835402 - }, - "correlation" : { - "products.base_price" : 0.8444765264325268, - "taxful_total_price" : 1.0 - } - } - ] - } - } -} -``` - -Statistic | Description -:--- | :--- -`count` | The number of samples measured. -`mean` | The average value of the field measured from the sample. -`variance` | How far the values of the field measured are spread out from its mean value. The larger the variance, the more it's spread from its mean value. -`skewness` | An asymmetric measure of the distribution of the field's values around the mean. -`kurtosis` | A measure of the tail heaviness of a distribution. As the tail becomes lighter, kurtosis decreases. As the tail becomes heavier, kurtosis increases. To learn about kurtosis, see [Wikipedia](https://en.wikipedia.org/wiki/Kurtosis). -`covariance` | A measure of the joint variability between two fields. A positive value means their values move in the same direction and vice versa. -`correlation` | A measure of the strength of the relationship between two fields. The valid values are between [-1, 1]. A value of -1 means that the value is negatively correlated and a value of 1 means that it's positively correlated. A value of 0 means that there's no identifiable relationship between them. - -## percentile, percentile_ranks - -Percentile is the percentage of the data that's at or below a certain threshold value. - -The `percentile` metric is a multi-value metric aggregation that lets you find outliers in your data or figure out the distribution of your data. - -Like the `cardinality` metric, the `percentile` metric is also approximate. - -The following example calculates the percentile in relation to the `taxful_total_price` field: - -```json -GET opensearch_dashboards_sample_data_ecommerce/_search -{ - "size": 0, - "aggs": { - "percentile_taxful_total_price": { - "percentiles": { - "field": "taxful_total_price" - } - } - } -} -``` - -#### Example response - -```json -... -"aggregations" : { - "percentile_taxful_total_price" : { - "values" : { - "1.0" : 21.984375, - "5.0" : 27.984375, - "25.0" : 44.96875, - "50.0" : 64.22061688311689, - "75.0" : 93.0, - "95.0" : 156.0, - "99.0" : 222.0 - } - } - } -} -``` - -Percentile rank is the percentile of values at or below a threshold grouped by a specified value. For example, if a value is greater than or equal to 80% of the values, it has a percentile rank of 80. - -```json -GET opensearch_dashboards_sample_data_ecommerce/_search -{ - "size": 0, - "aggs": { - "percentile_rank_taxful_total_price": { - "percentile_ranks": { - "field": "taxful_total_price", - "values": [ - 10, - 15 - ] - } - } - } -} -``` - -#### Example response - -```json -... -"aggregations" : { - "percentile_rank_taxful_total_price" : { - "values" : { - "10.0" : 0.055096056411283456, - "15.0" : 0.0830092961834656 - } - } - } -} -``` - -## geo_bound - -The `geo_bound` metric is a multi-value metric aggregation that calculates the bounding box in terms of latitude and longitude around a `geo_point` field. - -The following example returns the `geo_bound` metrics for the `geoip.location` field: - -```json -GET opensearch_dashboards_sample_data_ecommerce/_search -{ - "size": 0, - "aggs": { - "geo": { - "geo_bounds": { - "field": "geoip.location" - } - } - } -} -``` - -#### Example response - -```json -"aggregations" : { - "geo" : { - "bounds" : { - "top_left" : { - "lat" : 52.49999997206032, - "lon" : -118.20000001229346 - }, - "bottom_right" : { - "lat" : 4.599999985657632, - "lon" : 55.299999956041574 - } - } - } - } -} -``` - -## top_hits - -The `top_hits` metric is a multi-value metric aggregation that ranks the matching documents based on a relevance score for the field that's being aggregated. - -You can specify the following options: - -- `from`: The starting position of the hit. -- `size`: The maximum size of hits to return. The default value is 3. -- `sort`: How the matching hits are sorted. By default, the hits are sorted by the relevance score of the aggregation query. - -The following example returns the top 5 products in your eCommerce data: - -```json -GET opensearch_dashboards_sample_data_ecommerce/_search -{ - "size": 0, - "aggs": { - "top_hits_products": { - "top_hits": { - "size": 5 - } - } - } -} -``` - -#### Example response - -```json -... -"aggregations" : { - "top_hits_products" : { - "hits" : { - "total" : { - "value" : 4675, - "relation" : "eq" - }, - "max_score" : 1.0, - "hits" : [ - { - "_index" : "opensearch_dashboards_sample_data_ecommerce", - "_type" : "_doc", - "_id" : "glMlwXcBQVLeQPrkHPtI", - "_score" : 1.0, - "_source" : { - "category" : [ - "Women's Accessories", - "Women's Clothing" - ], - "currency" : "EUR", - "customer_first_name" : "rania", - "customer_full_name" : "rania Evans", - "customer_gender" : "FEMALE", - "customer_id" : 24, - "customer_last_name" : "Evans", - "customer_phone" : "", - "day_of_week" : "Sunday", - "day_of_week_i" : 6, - "email" : "rania@evans-family.zzz", - "manufacturer" : [ - "Tigress Enterprises" - ], - "order_date" : "2021-02-28T14:16:48+00:00", - "order_id" : 583581, - "products" : [ - { - "base_price" : 10.99, - "discount_percentage" : 0, - "quantity" : 1, - "manufacturer" : "Tigress Enterprises", - "tax_amount" : 0, - "product_id" : 19024, - "category" : "Women's Accessories", - "sku" : "ZO0082400824", - "taxless_price" : 10.99, - "unit_discount_amount" : 0, - "min_price" : 5.17, - "_id" : "sold_product_583581_19024", - "discount_amount" : 0, - "created_on" : "2016-12-25T14:16:48+00:00", - "product_name" : "Snood - white/grey/peach", - "price" : 10.99, - "taxful_price" : 10.99, - "base_unit_price" : 10.99 - }, - { - "base_price" : 32.99, - "discount_percentage" : 0, - "quantity" : 1, - "manufacturer" : "Tigress Enterprises", - "tax_amount" : 0, - "product_id" : 19260, - "category" : "Women's Clothing", - "sku" : "ZO0071900719", - "taxless_price" : 32.99, - "unit_discount_amount" : 0, - "min_price" : 17.15, - "_id" : "sold_product_583581_19260", - "discount_amount" : 0, - "created_on" : "2016-12-25T14:16:48+00:00", - "product_name" : "Cardigan - grey", - "price" : 32.99, - "taxful_price" : 32.99, - "base_unit_price" : 32.99 - } - ], - "sku" : [ - "ZO0082400824", - "ZO0071900719" - ], - "taxful_total_price" : 43.98, - "taxless_total_price" : 43.98, - "total_quantity" : 2, - "total_unique_products" : 2, - "type" : "order", - "user" : "rani", - "geoip" : { - "country_iso_code" : "EG", - "location" : { - "lon" : 31.3, - "lat" : 30.1 - }, - "region_name" : "Cairo Governorate", - "continent_name" : "Africa", - "city_name" : "Cairo" - }, - "event" : { - "dataset" : "sample_ecommerce" - } - } - ... - } - ] - } - } - } -} -``` - -## scripted_metric - -The `scripted_metric` metric is a multi-value metric aggregation that returns metrics calculated from a specified script. - -A script has four stages: the initial stage, the map stage, the combine stage, and the reduce stage. - -* `init_script`: (OPTIONAL) Sets the initial state and executes before any collection of documents. -* `map_script`: Checks the value of the `type` field and executes the aggregation on the collected documents. -* `combine_script`: Aggregates the state returned from every shard. The aggregated value is returned to the coordinating node. -* `reduce_script`: Provides access to the variable states; this variable combines the results from the `combine_script` on each shard into an array. - -The following example aggregates the different HTTP response types in web log data: - -```json -GET opensearch_dashboards_sample_data_logs/_search -{ - "size": 0, - "aggregations": { - "responses.counts": { - "scripted_metric": { - "init_script": "state.responses = ['error':0L,'success':0L,'other':0L]", - "map_script": """ - def code = doc['response.keyword'].value; - if (code.startsWith('5') || code.startsWith('4')) { - state.responses.error += 1 ; - } else if(code.startsWith('2')) { - state.responses.success += 1; - } else { - state.responses.other += 1; - } - """, - "combine_script": "state.responses", - "reduce_script": """ - def counts = ['error': 0L, 'success': 0L, 'other': 0L]; - for (responses in states) { - counts.error += responses['error']; - counts.success += responses['success']; - counts.other += responses['other']; - } - return counts; - """ - } - } - } -} -``` - -#### Example response - -```json -... -"aggregations" : { - "responses.counts" : { - "value" : { - "other" : 0, - "success" : 12832, - "error" : 1242 - } - } - } -} -``` diff --git a/_query-dsl/aggregations/metric/average.md b/_query-dsl/aggregations/metric/average.md new file mode 100644 index 0000000000..b14a810551 --- /dev/null +++ b/_query-dsl/aggregations/metric/average.md @@ -0,0 +1,56 @@ +--- +layout: default +title: Average +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 10 +--- + +# Average aggregations + +The `avg` metric is a single-value metric aggregations that returns the average value of a field. + +The following example calculates the average of the `taxful_total_price` field: + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "avg_taxful_total_price": { + "avg": { + "field": "taxful_total_price" + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +{ + "took": 85, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4675, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "sum_taxful_total_price": { + "value": 75.05542864304813 + } + } +} +``` \ No newline at end of file diff --git a/_query-dsl/aggregations/metric/cardinality.md b/_query-dsl/aggregations/metric/cardinality.md new file mode 100644 index 0000000000..4f816b87dd --- /dev/null +++ b/_query-dsl/aggregations/metric/cardinality.md @@ -0,0 +1,60 @@ +--- +layout: default +title: Cardinality +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 20 +--- + +# Cardinality aggregations + +The `cardinality` metric is a single-value metric aggregation that counts the number of unique or distinct values of a field. + +The following example finds the number of unique products in an eCommerce store: + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "unique_products": { + "cardinality": { + "field": "products.product_id" + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +... + "aggregations" : { + "unique_products" : { + "value" : 7033 + } + } +} +``` + +Cardinality count is approximate. +If you have tens of thousands of products in your hypothetical store, an accurate cardinality calculation requires loading all the values into a hash set and returning its size. This approach doesn't scale well; it requires huge amounts of memory and can cause high latencies. + +You can control the trade-off between memory and accuracy with the `precision_threshold` setting. This setting defines the threshold below which counts are expected to be close to accurate. Above this value, counts might become a bit less accurate. The default value of `precision_threshold` is 3,000. The maximum supported value is 40,000. + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "unique_products": { + "cardinality": { + "field": "products.product_id", + "precision_threshold": 10000 + } + } + } +} +``` \ No newline at end of file diff --git a/_query-dsl/aggregations/metric/extended-stats.md b/_query-dsl/aggregations/metric/extended-stats.md new file mode 100644 index 0000000000..3ef2ed0325 --- /dev/null +++ b/_query-dsl/aggregations/metric/extended-stats.md @@ -0,0 +1,75 @@ +--- +layout: default +title: Extended stats +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 30 +--- + +# Extended stats aggregations + +The `extended_stats` aggregation is an extended version of the [`stats`]({{site.url}}{{site.baseurl}}/query-dsl/aggregations/metric/stats/) aggregation. Apart from including basic stats, `extended_stats` also returns stats such as `sum_of_squares`, `variance`, and `std_deviation`. +The following example returns extended stats for `taxful_total_price`: +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "extended_stats_taxful_total_price": { + "extended_stats": { + "field": "taxful_total_price" + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +... +"aggregations" : { + "extended_stats_taxful_total_price" : { + "count" : 4675, + "min" : 6.98828125, + "max" : 2250.0, + "avg" : 75.05542864304813, + "sum" : 350884.12890625, + "sum_of_squares" : 3.9367749294174194E7, + "variance" : 2787.59157113862, + "variance_population" : 2787.59157113862, + "variance_sampling" : 2788.187974983536, + "std_deviation" : 52.79764740155209, + "std_deviation_population" : 52.79764740155209, + "std_deviation_sampling" : 52.80329511482722, + "std_deviation_bounds" : { + "upper" : 180.6507234461523, + "lower" : -30.53986616005605, + "upper_population" : 180.6507234461523, + "lower_population" : -30.53986616005605, + "upper_sampling" : 180.66201887270256, + "lower_sampling" : -30.551161586606312 + } + } + } +} +``` + +The `std_deviation_bounds` object provides a visual variance of the data with an interval of plus/minus two standard deviations from the mean. +To set the standard deviation to a different value, say 3, set `sigma` to 3: + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "extended_stats_taxful_total_price": { + "extended_stats": { + "field": "taxful_total_price", + "sigma": 3 + } + } + } +} +``` \ No newline at end of file diff --git a/_query-dsl/aggregations/metric/geobounds.md b/_query-dsl/aggregations/metric/geobounds.md new file mode 100644 index 0000000000..f78ece2397 --- /dev/null +++ b/_query-dsl/aggregations/metric/geobounds.md @@ -0,0 +1,48 @@ +--- +layout: default +title: Geobounds +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 40 +--- + +## Geobounds aggregations + +The `geo_bounds` metric is a multi-value metric aggregation that calculates the bounding box in terms of latitude and longitude around a `geo_point` field. + +The following example returns the `geo_bounds` metrics for the `geoip.location` field: + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "geo": { + "geo_bounds": { + "field": "geoip.location" + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +"aggregations" : { + "geo" : { + "bounds" : { + "top_left" : { + "lat" : 52.49999997206032, + "lon" : -118.20000001229346 + }, + "bottom_right" : { + "lat" : 4.599999985657632, + "lon" : 55.299999956041574 + } + } + } + } +} +``` \ No newline at end of file diff --git a/_query-dsl/aggregations/metric/index.md b/_query-dsl/aggregations/metric/index.md new file mode 100644 index 0000000000..8d023835b3 --- /dev/null +++ b/_query-dsl/aggregations/metric/index.md @@ -0,0 +1,28 @@ +--- +layout: default +title: Metric aggregations +parent: Aggregations +has_children: true +has_toc: true +nav_order: 2 +redirect_from: + - /opensearch/metric-agg/ + - /query-dsl/aggregations/metric-agg/ + - /aggregations/metric-agg/ +--- + +# Metric aggregations + +Metric aggregations let you perform simple calculations such as finding the minimum, maximum, and average values of a field. + +## Types of metric aggregations + +Metric aggregations are of two types: single-value metric aggregations and multi-value metric aggregations. + +### Single-value metric aggregations + +Single-value metric aggregations return a single metric. For example, `sum`, `min`, `max`, `avg`, `cardinality`, and `value_count`. + +### Multi-value metric aggregations + +Multi-value metric aggregations return more than one metric. For example, `stats`, `extended_stats`, `matrix_stats`, `percentile`, `percentile_ranks`, `geo_bound`, `top_hits`, and `scripted_metric`. diff --git a/_query-dsl/aggregations/metric/matrix-stats.md b/_query-dsl/aggregations/metric/matrix-stats.md new file mode 100644 index 0000000000..8c2800ecad --- /dev/null +++ b/_query-dsl/aggregations/metric/matrix-stats.md @@ -0,0 +1,85 @@ +--- +layout: default +title: Matrix stats +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 50 +--- + +# Matrix stats aggregations + +The `matrix_stats` aggregation generates advanced stats for multiple fields in a matrix form. +The following example returns advanced stats in a matrix form for the `taxful_total_price` and `products.base_price` fields: + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "matrix_stats_taxful_total_price": { + "matrix_stats": { + "fields": ["taxful_total_price", "products.base_price"] + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +... +"aggregations" : { + "matrix_stats_taxful_total_price" : { + "doc_count" : 4675, + "fields" : [ + { + "name" : "products.base_price", + "count" : 4675, + "mean" : 34.994239430147196, + "variance" : 360.5035285833703, + "skewness" : 5.530161335032702, + "kurtosis" : 131.16306324042148, + "covariance" : { + "products.base_price" : 360.5035285833703, + "taxful_total_price" : 846.6489362233166 + }, + "correlation" : { + "products.base_price" : 1.0, + "taxful_total_price" : 0.8444765264325268 + } + }, + { + "name" : "taxful_total_price", + "count" : 4675, + "mean" : 75.05542864304839, + "variance" : 2788.1879749835402, + "skewness" : 15.812149139924037, + "kurtosis" : 619.1235507385902, + "covariance" : { + "products.base_price" : 846.6489362233166, + "taxful_total_price" : 2788.1879749835402 + }, + "correlation" : { + "products.base_price" : 0.8444765264325268, + "taxful_total_price" : 1.0 + } + } + ] + } + } +} +``` + +The following table lists all response fields. + +Statistic | Description +:--- | :--- +`count` | The number of samples measured. +`mean` | The average value of the field measured from the sample. +`variance` | How far the values of the field measured are spread out from its mean value. The larger the variance, the more it's spread from its mean value. +`skewness` | An asymmetric measure of the distribution of the field's values around the mean. +`kurtosis` | A measure of the tail heaviness of a distribution. As the tail becomes lighter, kurtosis decreases. As the tail becomes heavier, kurtosis increases. To learn about kurtosis, see [Wikipedia](https://en.wikipedia.org/wiki/Kurtosis). +`covariance` | A measure of the joint variability between two fields. A positive value means their values move in the same direction and the other way around. +`correlation` | A measure of the strength of the relationship between two fields. The valid values are between [-1, 1]. A value of -1 means that the value is negatively correlated and a value of 1 means that it's positively correlated. A value of 0 means that there's no identifiable relationship between them. \ No newline at end of file diff --git a/_query-dsl/aggregations/metric/maximum.md b/_query-dsl/aggregations/metric/maximum.md new file mode 100644 index 0000000000..7f8967f47f --- /dev/null +++ b/_query-dsl/aggregations/metric/maximum.md @@ -0,0 +1,56 @@ +--- +layout: default +title: Maximum +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 60 +--- + +# Maximum aggregations + +The `max` metric is a single-value metric aggregations that returns the maximum value of a field. + +The following example calculates the maximum of the `taxful_total_price` field: + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "max_taxful_total_price": { + "max": { + "field": "taxful_total_price" + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +{ + "took": 17, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4675, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "max_taxful_total_price": { + "value": 2250 + } + } +} +``` \ No newline at end of file diff --git a/_query-dsl/aggregations/metric/minimum.md b/_query-dsl/aggregations/metric/minimum.md new file mode 100644 index 0000000000..65c51d25a8 --- /dev/null +++ b/_query-dsl/aggregations/metric/minimum.md @@ -0,0 +1,56 @@ +--- +layout: default +title: Minimum +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 70 +--- + +# Minimum aggregations + +The `min` metric is a single-value metric aggregations that returns the minimum value of a field. + +The following example calculates the minimum of the `taxful_total_price` field: + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "min_taxful_total_price": { + "min": { + "field": "taxful_total_price" + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +{ + "took": 13, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4675, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "min_taxful_total_price": { + "value": 6.98828125 + } + } +} +``` \ No newline at end of file diff --git a/_query-dsl/aggregations/metric/percentile-ranks.md b/_query-dsl/aggregations/metric/percentile-ranks.md new file mode 100644 index 0000000000..9bd1c382b1 --- /dev/null +++ b/_query-dsl/aggregations/metric/percentile-ranks.md @@ -0,0 +1,45 @@ +--- +layout: default +title: Percentile ranks +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 80 +--- + +# Percentile rank aggregations + +Percentile rank is the percentile of values at or below a threshold grouped by a specified value. For example, if a value is greater than or equal to 80% of the values, it has a percentile rank of 80. + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "percentile_rank_taxful_total_price": { + "percentile_ranks": { + "field": "taxful_total_price", + "values": [ + 10, + 15 + ] + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +... +"aggregations" : { + "percentile_rank_taxful_total_price" : { + "values" : { + "10.0" : 0.055096056411283456, + "15.0" : 0.0830092961834656 + } + } + } +} +``` \ No newline at end of file diff --git a/_query-dsl/aggregations/metric/percentile.md b/_query-dsl/aggregations/metric/percentile.md new file mode 100644 index 0000000000..f3f890b908 --- /dev/null +++ b/_query-dsl/aggregations/metric/percentile.md @@ -0,0 +1,52 @@ +--- +layout: default +title: Percentile +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 90 +--- + +# Percentile aggregations + +Percentile is the percentage of the data that's at or below a certain threshold value. + +The `percentile` metric is a multi-value metric aggregation that lets you find outliers in your data or figure out the distribution of your data. + +Like the `cardinality` metric, the `percentile` metric is also approximate. + +The following example calculates the percentile in relation to the `taxful_total_price` field: + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "percentile_taxful_total_price": { + "percentiles": { + "field": "taxful_total_price" + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +... +"aggregations" : { + "percentile_taxful_total_price" : { + "values" : { + "1.0" : 21.984375, + "5.0" : 27.984375, + "25.0" : 44.96875, + "50.0" : 64.22061688311689, + "75.0" : 93.0, + "95.0" : 156.0, + "99.0" : 222.0 + } + } + } +} +``` diff --git a/_query-dsl/aggregations/metric/scripted-metric.md b/_query-dsl/aggregations/metric/scripted-metric.md new file mode 100644 index 0000000000..f66c03616a --- /dev/null +++ b/_query-dsl/aggregations/metric/scripted-metric.md @@ -0,0 +1,71 @@ +--- +layout: default +title: Scripted metric +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 100 +--- + +# Scripted metric aggregations + +The `scripted_metric` metric is a multi-value metric aggregation that returns metrics calculated from a specified script. + +A script has four stages: the initial stage, the map stage, the combine stage, and the reduce stage. + +* `init_script`: (OPTIONAL) Sets the initial state and executes before any collection of documents. +* `map_script`: Checks the value of the `type` field and executes the aggregation on the collected documents. +* `combine_script`: Aggregates the state returned from every shard. The aggregated value is returned to the coordinating node. +* `reduce_script`: Provides access to the variable states; this variable combines the results from the `combine_script` on each shard into an array. + +The following example aggregates the different HTTP response types in web log data: + +```json +GET opensearch_dashboards_sample_data_logs/_search +{ + "size": 0, + "aggregations": { + "responses.counts": { + "scripted_metric": { + "init_script": "state.responses = ['error':0L,'success':0L,'other':0L]", + "map_script": """ + def code = doc['response.keyword'].value; + if (code.startsWith('5') || code.startsWith('4')) { + state.responses.error += 1 ; + } else if(code.startsWith('2')) { + state.responses.success += 1; + } else { + state.responses.other += 1; + } + """, + "combine_script": "state.responses", + "reduce_script": """ + def counts = ['error': 0L, 'success': 0L, 'other': 0L]; + for (responses in states) { + counts.error += responses['error']; + counts.success += responses['success']; + counts.other += responses['other']; + } + return counts; + """ + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +... +"aggregations" : { + "responses.counts" : { + "value" : { + "other" : 0, + "success" : 12832, + "error" : 1242 + } + } + } +} +``` diff --git a/_query-dsl/aggregations/metric/stats.md b/_query-dsl/aggregations/metric/stats.md new file mode 100644 index 0000000000..4134b324ce --- /dev/null +++ b/_query-dsl/aggregations/metric/stats.md @@ -0,0 +1,44 @@ +--- +layout: default +title: Stats aggregations +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 110 +--- + +# Stats aggregations + +The `stats` metric is a multi-value metric aggregation that returns all basic metrics such as `min`, `max`, `sum`, `avg`, and `value_count` in one aggregation query. + +The following example returns the basic stats for the `taxful_total_price` field: + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "stats_taxful_total_price": { + "stats": { + "field": "taxful_total_price" + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +... +"aggregations" : { + "stats_taxful_total_price" : { + "count" : 4675, + "min" : 6.98828125, + "max" : 2250.0, + "avg" : 75.05542864304813, + "sum" : 350884.12890625 + } + } +} +``` \ No newline at end of file diff --git a/_query-dsl/aggregations/metric/sum.md b/_query-dsl/aggregations/metric/sum.md new file mode 100644 index 0000000000..a2115a7219 --- /dev/null +++ b/_query-dsl/aggregations/metric/sum.md @@ -0,0 +1,56 @@ +--- +layout: default +title: Sum +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 120 +--- + +# Sum aggregations + +The `sum` metric is a single-value metric aggregations that returns the sum of the values of a field. + +The following example calculates the total sum of the `taxful_total_price` field: + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "sum_taxful_total_price": { + "sum": { + "field": "taxful_total_price" + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +{ + "took": 16, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4675, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "sum_taxful_total_price": { + "value": 350884.12890625 + } + } +} +``` diff --git a/_query-dsl/aggregations/metric/top-hits.md b/_query-dsl/aggregations/metric/top-hits.md new file mode 100644 index 0000000000..f0845ebac7 --- /dev/null +++ b/_query-dsl/aggregations/metric/top-hits.md @@ -0,0 +1,147 @@ +--- +layout: default +title: Top hits +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 130 +--- + +# Top hits aggregations + +The `top_hits` metric is a multi-value metric aggregation that ranks the matching documents based on a relevance score for the field that's being aggregated. + +You can specify the following options: + +- `from`: The starting position of the hit. +- `size`: The maximum size of hits to return. The default value is 3. +- `sort`: How the matching hits are sorted. By default, the hits are sorted by the relevance score of the aggregation query. + +The following example returns the top 5 products in your eCommerce data: + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "top_hits_products": { + "top_hits": { + "size": 5 + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +... +"aggregations" : { + "top_hits_products" : { + "hits" : { + "total" : { + "value" : 4675, + "relation" : "eq" + }, + "max_score" : 1.0, + "hits" : [ + { + "_index" : "opensearch_dashboards_sample_data_ecommerce", + "_type" : "_doc", + "_id" : "glMlwXcBQVLeQPrkHPtI", + "_score" : 1.0, + "_source" : { + "category" : [ + "Women's Accessories", + "Women's Clothing" + ], + "currency" : "EUR", + "customer_first_name" : "rania", + "customer_full_name" : "rania Evans", + "customer_gender" : "FEMALE", + "customer_id" : 24, + "customer_last_name" : "Evans", + "customer_phone" : "", + "day_of_week" : "Sunday", + "day_of_week_i" : 6, + "email" : "rania@evans-family.zzz", + "manufacturer" : [ + "Tigress Enterprises" + ], + "order_date" : "2021-02-28T14:16:48+00:00", + "order_id" : 583581, + "products" : [ + { + "base_price" : 10.99, + "discount_percentage" : 0, + "quantity" : 1, + "manufacturer" : "Tigress Enterprises", + "tax_amount" : 0, + "product_id" : 19024, + "category" : "Women's Accessories", + "sku" : "ZO0082400824", + "taxless_price" : 10.99, + "unit_discount_amount" : 0, + "min_price" : 5.17, + "_id" : "sold_product_583581_19024", + "discount_amount" : 0, + "created_on" : "2016-12-25T14:16:48+00:00", + "product_name" : "Snood - white/grey/peach", + "price" : 10.99, + "taxful_price" : 10.99, + "base_unit_price" : 10.99 + }, + { + "base_price" : 32.99, + "discount_percentage" : 0, + "quantity" : 1, + "manufacturer" : "Tigress Enterprises", + "tax_amount" : 0, + "product_id" : 19260, + "category" : "Women's Clothing", + "sku" : "ZO0071900719", + "taxless_price" : 32.99, + "unit_discount_amount" : 0, + "min_price" : 17.15, + "_id" : "sold_product_583581_19260", + "discount_amount" : 0, + "created_on" : "2016-12-25T14:16:48+00:00", + "product_name" : "Cardigan - grey", + "price" : 32.99, + "taxful_price" : 32.99, + "base_unit_price" : 32.99 + } + ], + "sku" : [ + "ZO0082400824", + "ZO0071900719" + ], + "taxful_total_price" : 43.98, + "taxless_total_price" : 43.98, + "total_quantity" : 2, + "total_unique_products" : 2, + "type" : "order", + "user" : "rani", + "geoip" : { + "country_iso_code" : "EG", + "location" : { + "lon" : 31.3, + "lat" : 30.1 + }, + "region_name" : "Cairo Governorate", + "continent_name" : "Africa", + "city_name" : "Cairo" + }, + "event" : { + "dataset" : "sample_ecommerce" + } + } + ... + } + ] + } + } + } +} +``` \ No newline at end of file diff --git a/_query-dsl/aggregations/metric/value-count.md b/_query-dsl/aggregations/metric/value-count.md new file mode 100644 index 0000000000..9999d66553 --- /dev/null +++ b/_query-dsl/aggregations/metric/value-count.md @@ -0,0 +1,40 @@ +--- +layout: default +title: Value count +parent: Metric aggregations +grand_parent: Aggregations +nav_order: 140 +--- + +# Value count aggregations + +The `value_count` metric is a single-value metric aggregation that calculates the number of values that an aggregation is based on. + +For example, you can use the `value_count` metric with the `avg` metric to find how many numbers the aggregation uses to calculate an average value. + +```json +GET opensearch_dashboards_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "number_of_values": { + "value_count": { + "field": "taxful_total_price" + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +... + "aggregations" : { + "number_of_values" : { + "value" : 4675 + } + } +} +``` \ No newline at end of file