From dac646011f6c630f291fd10c1609dc3206480943 Mon Sep 17 00:00:00 2001 From: Bharathwaj G Date: Fri, 23 Aug 2024 19:48:16 +0530 Subject: [PATCH] [Star tree] Changes to handle derived metrics such as avg as part of star tree mapping (#15152) --------- Signed-off-by: Bharathwaj G --- .../index/mapper/StarTreeMapperIT.java | 20 +- .../compositeindex/datacube/MetricStat.java | 27 ++- .../startree/StarTreeIndexSettings.java | 13 +- .../startree/builder/BaseStarTreeBuilder.java | 3 + .../index/mapper/StarTreeMapper.java | 48 ++++- .../index/mapper/StarTreeMapperTests.java | 183 ++++++++++++++++-- 6 files changed, 247 insertions(+), 47 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java b/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java index 6f5b4bba481dd..c461f83657340 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java @@ -265,13 +265,9 @@ public void testValidCompositeIndex() { assertEquals(expectedTimeUnits, dateDim.getIntervals()); assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField()); assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField()); - List expectedMetrics = Arrays.asList( - MetricStat.AVG, - MetricStat.VALUE_COUNT, - MetricStat.SUM, - MetricStat.MAX, - MetricStat.MIN - ); + + // Assert default metrics + List expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG); assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); assertEquals( @@ -349,13 +345,9 @@ public void testUpdateIndexWhenMappingIsSame() { assertEquals(expectedTimeUnits, dateDim.getIntervals()); assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField()); assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField()); - List expectedMetrics = Arrays.asList( - MetricStat.AVG, - MetricStat.VALUE_COUNT, - MetricStat.SUM, - MetricStat.MAX, - MetricStat.MIN - ); + + // Assert default metrics + List expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG); assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); assertEquals( diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java index df3b2229d2c5b..84eaaeb637962 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java @@ -10,6 +10,9 @@ import org.opensearch.common.annotation.ExperimentalApi; +import java.util.Arrays; +import java.util.List; + /** * Supported metric types for composite index * @@ -18,21 +21,39 @@ @ExperimentalApi public enum MetricStat { VALUE_COUNT("value_count"), - AVG("avg"), SUM("sum"), MIN("min"), - MAX("max"); + MAX("max"), + AVG("avg", VALUE_COUNT, SUM); private final String typeName; + private final MetricStat[] baseMetrics; - MetricStat(String typeName) { + MetricStat(String typeName, MetricStat... baseMetrics) { this.typeName = typeName; + this.baseMetrics = baseMetrics; } public String getTypeName() { return typeName; } + /** + * Return the list of metrics that this metric is derived from + * For example, AVG is derived from COUNT and SUM + */ + public List getBaseMetrics() { + return Arrays.asList(baseMetrics); + } + + /** + * Return true if this metric is derived from other metrics + * For example, AVG is derived from COUNT and SUM + */ + public boolean isDerivedMetric() { + return baseMetrics != null && baseMetrics.length > 0; + } + public static MetricStat fromTypeName(String typeName) { for (MetricStat metric : MetricStat.values()) { if (metric.getTypeName().equalsIgnoreCase(typeName)) { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeIndexSettings.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeIndexSettings.java index 6535f8ed11da3..ce389a99b3626 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeIndexSettings.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeIndexSettings.java @@ -15,6 +15,7 @@ import java.util.Arrays; import java.util.List; +import java.util.function.Function; /** * Index settings for star tree fields. The settings are final as right now @@ -93,16 +94,10 @@ public class StarTreeIndexSettings { /** * Default metrics for metrics as part of star tree fields */ - public static final Setting> DEFAULT_METRICS_LIST = Setting.listSetting( + public static final Setting> DEFAULT_METRICS_LIST = Setting.listSetting( "index.composite_index.star_tree.field.default.metrics", - Arrays.asList( - MetricStat.AVG.toString(), - MetricStat.VALUE_COUNT.toString(), - MetricStat.SUM.toString(), - MetricStat.MAX.toString(), - MetricStat.MIN.toString() - ), - MetricStat::fromTypeName, + Arrays.asList(MetricStat.VALUE_COUNT.toString(), MetricStat.SUM.toString()), + Function.identity(), Setting.Property.IndexScope, Setting.Property.Final ); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index 90b2d0727d572..3fc8d24e6e0d2 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -118,6 +118,9 @@ public List generateMetricAggregatorInfos(MapperService ma List metricAggregatorInfos = new ArrayList<>(); for (Metric metric : this.starTreeField.getMetrics()) { for (MetricStat metricStat : metric.getMetrics()) { + if (metricStat.isDerivedMetric()) { + continue; + } IndexNumericFieldData.NumericType numericType; Mapper fieldMapper = mapperService.documentMapper().mappers().getMapper(metric.getField()); if (fieldMapper instanceof NumberFieldMapper) { diff --git a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java index d9539f9dc0c82..93764e93ae30d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java @@ -28,6 +28,7 @@ import java.util.Locale; import java.util.Map; import java.util.Optional; +import java.util.Queue; import java.util.Set; import java.util.stream.Collectors; @@ -262,17 +263,50 @@ private Metric getMetric(String name, Map metric, Mapper.TypePar .collect(Collectors.toList()); metric.remove(STATS); if (metricStrings.isEmpty()) { - metricTypes = new ArrayList<>(StarTreeIndexSettings.DEFAULT_METRICS_LIST.get(context.getSettings())); - } else { - Set metricSet = new LinkedHashSet<>(); - for (String metricString : metricStrings) { - metricSet.add(MetricStat.fromTypeName(metricString)); - } - metricTypes = new ArrayList<>(metricSet); + metricStrings = new ArrayList<>(StarTreeIndexSettings.DEFAULT_METRICS_LIST.get(context.getSettings())); + } + // Add all required metrics initially + Set metricSet = new LinkedHashSet<>(); + for (String metricString : metricStrings) { + MetricStat metricStat = MetricStat.fromTypeName(metricString); + metricSet.add(metricStat); + addBaseMetrics(metricStat, metricSet); } + addEligibleDerivedMetrics(metricSet); + metricTypes = new ArrayList<>(metricSet); return new Metric(name, metricTypes); } + /** + * Add base metrics of derived metric to metric set + */ + private void addBaseMetrics(MetricStat metricStat, Set metricSet) { + if (metricStat.isDerivedMetric()) { + Queue metricQueue = new LinkedList<>(metricStat.getBaseMetrics()); + while (metricQueue.isEmpty() == false) { + MetricStat metric = metricQueue.poll(); + if (metric.isDerivedMetric() && !metricSet.contains(metric)) { + metricQueue.addAll(metric.getBaseMetrics()); + } + metricSet.add(metric); + } + } + } + + /** + * Add derived metrics if all associated base metrics are present + */ + private void addEligibleDerivedMetrics(Set metricStats) { + for (MetricStat metric : MetricStat.values()) { + if (metric.isDerivedMetric() && !metricStats.contains(metric)) { + List sourceMetrics = metric.getBaseMetrics(); + if (metricStats.containsAll(sourceMetrics)) { + metricStats.add(metric); + } + } + } + } + @Override protected List> getParameters() { return List.of(config); diff --git a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java index 3fa97825cdfc6..6b3b87da89915 100644 --- a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java @@ -52,7 +52,7 @@ public void teardown() { } public void testValidStarTree() throws IOException { - MapperService mapperService = createMapperService(getExpandedMapping("status", "size")); + MapperService mapperService = createMapperService(getExpandedMappingWithJustAvg("status", "size")); Set compositeFieldTypes = mapperService.getCompositeFieldTypes(); for (CompositeMappedFieldType type : compositeFieldTypes) { StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) type; @@ -66,7 +66,65 @@ public void testValidStarTree() throws IOException { assertEquals(expectedTimeUnits, dateDim.getIntervals()); assertEquals("status", starTreeFieldType.getDimensions().get(1).getField()); assertEquals("size", starTreeFieldType.getMetrics().get(0).getField()); - List expectedMetrics = Arrays.asList(MetricStat.SUM, MetricStat.AVG); + + // Assert COUNT and SUM gets added when AVG is defined + List expectedMetrics = Arrays.asList(MetricStat.AVG, MetricStat.VALUE_COUNT, MetricStat.SUM); + assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); + assertEquals(100, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); + assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); + assertEquals( + new HashSet<>(Arrays.asList("@timestamp", "status")), + starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims() + ); + } + } + + public void testMetricsWithJustSum() throws IOException { + MapperService mapperService = createMapperService(getExpandedMappingWithJustSum("status", "size")); + Set compositeFieldTypes = mapperService.getCompositeFieldTypes(); + for (CompositeMappedFieldType type : compositeFieldTypes) { + StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) type; + assertEquals("@timestamp", starTreeFieldType.getDimensions().get(0).getField()); + assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension); + DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0); + List expectedTimeUnits = Arrays.asList( + Rounding.DateTimeUnit.DAY_OF_MONTH, + Rounding.DateTimeUnit.MONTH_OF_YEAR + ); + assertEquals(expectedTimeUnits, dateDim.getIntervals()); + assertEquals("status", starTreeFieldType.getDimensions().get(1).getField()); + assertEquals("size", starTreeFieldType.getMetrics().get(0).getField()); + + // Assert AVG gets added when both of its base metrics is already present + List expectedMetrics = List.of(MetricStat.SUM); + assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); + assertEquals(100, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); + assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); + assertEquals( + new HashSet<>(Arrays.asList("@timestamp", "status")), + starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims() + ); + } + } + + public void testMetricsWithCountAndSum() throws IOException { + MapperService mapperService = createMapperService(getExpandedMappingWithSumAndCount("status", "size")); + Set compositeFieldTypes = mapperService.getCompositeFieldTypes(); + for (CompositeMappedFieldType type : compositeFieldTypes) { + StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) type; + assertEquals("@timestamp", starTreeFieldType.getDimensions().get(0).getField()); + assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension); + DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0); + List expectedTimeUnits = Arrays.asList( + Rounding.DateTimeUnit.DAY_OF_MONTH, + Rounding.DateTimeUnit.MONTH_OF_YEAR + ); + assertEquals(expectedTimeUnits, dateDim.getIntervals()); + assertEquals("status", starTreeFieldType.getDimensions().get(1).getField()); + assertEquals("size", starTreeFieldType.getMetrics().get(0).getField()); + + // Assert AVG gets added when both of its base metrics is already present + List expectedMetrics = List.of(MetricStat.SUM, MetricStat.VALUE_COUNT, MetricStat.AVG); assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); assertEquals(100, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); @@ -92,13 +150,7 @@ public void testValidStarTreeDefaults() throws IOException { assertEquals(expectedTimeUnits, dateDim.getIntervals()); assertEquals("status", starTreeFieldType.getDimensions().get(1).getField()); assertEquals("status", starTreeFieldType.getMetrics().get(0).getField()); - List expectedMetrics = Arrays.asList( - MetricStat.AVG, - MetricStat.VALUE_COUNT, - MetricStat.SUM, - MetricStat.MAX, - MetricStat.MIN - ); + List expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG); assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); @@ -109,7 +161,7 @@ public void testValidStarTreeDefaults() throws IOException { public void testInvalidDim() { MapperParsingException ex = expectThrows( MapperParsingException.class, - () -> createMapperService(getExpandedMapping("invalid", "size")) + () -> createMapperService(getExpandedMappingWithJustAvg("invalid", "size")) ); assertEquals("Failed to parse mapping [_doc]: unknown dimension field [invalid]", ex.getMessage()); } @@ -117,7 +169,7 @@ public void testInvalidDim() { public void testInvalidMetric() { MapperParsingException ex = expectThrows( MapperParsingException.class, - () -> createMapperService(getExpandedMapping("status", "invalid")) + () -> createMapperService(getExpandedMappingWithJustAvg("status", "invalid")) ); assertEquals("Failed to parse mapping [_doc]: unknown metric field [invalid]", ex.getMessage()); } @@ -232,6 +284,9 @@ public void testMetric() { assertEquals(MetricStat.MIN, MetricStat.fromTypeName("min")); assertEquals(MetricStat.SUM, MetricStat.fromTypeName("sum")); assertEquals(MetricStat.AVG, MetricStat.fromTypeName("avg")); + + assertEquals(List.of(MetricStat.VALUE_COUNT, MetricStat.SUM), MetricStat.AVG.getBaseMetrics()); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> MetricStat.fromTypeName("invalid")); assertEquals("Invalid metric stat: invalid", ex.getMessage()); } @@ -310,7 +365,7 @@ public void testStarTreeField() { } public void testValidations() throws IOException { - MapperService mapperService = createMapperService(getExpandedMapping("status", "size")); + MapperService mapperService = createMapperService(getExpandedMappingWithJustAvg("status", "size")); Settings settings = Settings.builder().put(CompositeIndexSettings.STAR_TREE_INDEX_ENABLED_SETTING.getKey(), true).build(); CompositeIndexSettings enabledCompositeIndexSettings = new CompositeIndexSettings( settings, @@ -370,7 +425,7 @@ public void testValidations() throws IOException { ); } - private XContentBuilder getExpandedMapping(String dim, String metric) throws IOException { + private XContentBuilder getExpandedMappingWithJustAvg(String dim, String metric) throws IOException { return topMapping(b -> { b.startObject("composite"); b.startObject("startree"); @@ -399,7 +454,6 @@ private XContentBuilder getExpandedMapping(String dim, String metric) throws IOE b.startObject(); b.field("name", metric); b.startArray("stats"); - b.value("sum"); b.value("avg"); b.endArray(); b.endObject(); @@ -421,6 +475,107 @@ private XContentBuilder getExpandedMapping(String dim, String metric) throws IOE }); } + private XContentBuilder getExpandedMappingWithJustSum(String dim, String metric) throws IOException { + return topMapping(b -> { + b.startObject("composite"); + b.startObject("startree"); + b.field("type", "star_tree"); + b.startObject("config"); + b.field("max_leaf_docs", 100); + b.startArray("skip_star_node_creation_for_dimensions"); + { + b.value("@timestamp"); + b.value("status"); + } + b.endArray(); + b.startArray("ordered_dimensions"); + b.startObject(); + b.field("name", "@timestamp"); + b.startArray("calendar_intervals"); + b.value("day"); + b.value("month"); + b.endArray(); + b.endObject(); + b.startObject(); + b.field("name", dim); + b.endObject(); + b.endArray(); + b.startArray("metrics"); + b.startObject(); + b.field("name", metric); + b.startArray("stats"); + b.value("sum"); + b.endArray(); + b.endObject(); + b.endArray(); + b.endObject(); + b.endObject(); + b.endObject(); + b.startObject("properties"); + b.startObject("@timestamp"); + b.field("type", "date"); + b.endObject(); + b.startObject("status"); + b.field("type", "integer"); + b.endObject(); + b.startObject("size"); + b.field("type", "integer"); + b.endObject(); + b.endObject(); + }); + } + + private XContentBuilder getExpandedMappingWithSumAndCount(String dim, String metric) throws IOException { + return topMapping(b -> { + b.startObject("composite"); + b.startObject("startree"); + b.field("type", "star_tree"); + b.startObject("config"); + b.field("max_leaf_docs", 100); + b.startArray("skip_star_node_creation_for_dimensions"); + { + b.value("@timestamp"); + b.value("status"); + } + b.endArray(); + b.startArray("ordered_dimensions"); + b.startObject(); + b.field("name", "@timestamp"); + b.startArray("calendar_intervals"); + b.value("day"); + b.value("month"); + b.endArray(); + b.endObject(); + b.startObject(); + b.field("name", dim); + b.endObject(); + b.endArray(); + b.startArray("metrics"); + b.startObject(); + b.field("name", metric); + b.startArray("stats"); + b.value("sum"); + b.value("value_count"); + b.endArray(); + b.endObject(); + b.endArray(); + b.endObject(); + b.endObject(); + b.endObject(); + b.startObject("properties"); + b.startObject("@timestamp"); + b.field("type", "date"); + b.endObject(); + b.startObject("status"); + b.field("type", "integer"); + b.endObject(); + b.startObject("size"); + b.field("type", "integer"); + b.endObject(); + b.endObject(); + }); + } + private XContentBuilder getMinMapping() throws IOException { return getMinMapping(false, false, false, false); }