Skip to content

Commit

Permalink
scaled float support
Browse files Browse the repository at this point in the history
Signed-off-by: Sarthak Aggarwal <[email protected]>
  • Loading branch information
sarthakaggarwal97 authored and bharath-techie committed Sep 4, 2024
1 parent 68d420a commit 3f8272b
Show file tree
Hide file tree
Showing 44 changed files with 590 additions and 496 deletions.
2 changes: 1 addition & 1 deletion distribution/src/config/opensearch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,4 +128,4 @@ ${path.logs}
#
# Gates the functionality of star tree index, which improves the performance of search aggregations.
#
#opensearch.experimental.feature.composite_index.star_tree.enabled: true
opensearch.experimental.feature.composite_index.star_tree.enabled: true
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.opensearch.common.xcontent.support.XContentMapValues;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.core.xcontent.XContentParser.Token;
import org.opensearch.index.compositeindex.datacube.DimensionType;
import org.opensearch.index.fielddata.FieldData;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.fielddata.IndexNumericFieldData;
Expand All @@ -71,10 +72,12 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Supplier;

/** A {@link FieldMapper} for scaled floats. Values are internally multiplied
* by a scaling factor and rounded to the closest long. */
* by a scaling factor and rounded to the closest long.
*/
public class ScaledFloatFieldMapper extends ParametrizedFieldMapper {

public static final String CONTENT_TYPE = "scaled_float";
Expand Down Expand Up @@ -162,11 +165,21 @@ public ScaledFloatFieldMapper build(BuilderContext context) {
);
return new ScaledFloatFieldMapper(name, type, multiFieldsBuilder.build(this, context), copyTo.build(), this);
}

@Override
public Optional<DimensionType> getSupportedDataCubeDimensionType() {
return Optional.of(DimensionType.NUMERIC);
}

@Override
public boolean isDataCubeMetricSupported() {
return true;
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getSettings()));

public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder {
public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder, FieldValueConverter {

private final double scalingFactor;
private final Double nullValue;
Expand Down Expand Up @@ -340,6 +353,12 @@ public DocValueFormat docValueFormat(String format, ZoneId timeZone) {
private double scale(Object input) {
return new BigDecimal(Double.toString(parse(input))).multiply(BigDecimal.valueOf(scalingFactor)).doubleValue();
}

@Override
public double toDoubleValue(long value) {
double inverseScalingFactor = 1d / scalingFactor;
return value * inverseScalingFactor;
}
}

private final Explicit<Boolean> ignoreMalformed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,24 @@

import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.core.xcontent.MediaTypeRegistry;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
import org.opensearch.plugins.Plugin;
import org.junit.AfterClass;
import org.junit.BeforeClass;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;

import static java.util.Collections.singletonList;
import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX;
import static org.hamcrest.Matchers.containsString;

public class ScaledFloatFieldMapperTests extends MapperTestCase {
Expand Down Expand Up @@ -91,24 +97,112 @@ public void testExistsQueryDocValuesDisabled() throws IOException {
assertParseMinimalWarnings();
}

public void testDefaults() throws Exception {
XContentBuilder mapping = fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 10.0));
@BeforeClass
public static void createMapper() {
FeatureFlags.initializeFeatureFlags(Settings.builder().put(STAR_TREE_INDEX, "true").build());
}

@AfterClass
public static void clearMapper() {
FeatureFlags.initializeFeatureFlags(Settings.EMPTY);
}

public void testScaledFloatWithStarTree() throws Exception {

double scalingFactorField1 = randomDouble() * 100;
double scalingFactorField2 = randomDouble() * 100;
double scalingFactorField3 = randomDouble() * 100;

XContentBuilder mapping = getStarTreeMappingWithScaledFloat(scalingFactorField1, scalingFactorField2, scalingFactorField3);
DocumentMapper mapper = createDocumentMapper(mapping);
assertEquals(mapping.toString(), mapper.mappingSource().toString());
assertTrue(mapping.toString().contains("startree"));

ParsedDocument doc = mapper.parse(source(b -> b.field("field", 123)));
IndexableField[] fields = doc.rootDoc().getFields("field");
long randomLongField1 = randomLong();
long randomLongField2 = randomLong();
long randomLongField3 = randomLong();
ParsedDocument doc = mapper.parse(
source(b -> b.field("field1", randomLongField1).field("field2", randomLongField2).field("field3", randomLongField3))
);
validateScaledFloatFields(doc, "field1", randomLongField1, scalingFactorField1);
validateScaledFloatFields(doc, "field2", randomLongField2, scalingFactorField2);
validateScaledFloatFields(doc, "field3", randomLongField3, scalingFactorField3);
}

@Override
protected Settings getIndexSettings() {
return Settings.builder()
.put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true)
.put(super.getIndexSettings())
.build();
}

private static void validateScaledFloatFields(ParsedDocument doc, String field, long value, double scalingFactor) {
IndexableField[] fields = doc.rootDoc().getFields(field);
assertEquals(2, fields.length);
IndexableField pointField = fields[0];
assertEquals(1, pointField.fieldType().pointDimensionCount());
assertFalse(pointField.fieldType().stored());
assertEquals(1230, pointField.numericValue().longValue());
assertEquals((long) (value * scalingFactor), pointField.numericValue().longValue());
IndexableField dvField = fields[1];
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
assertEquals(1230, dvField.numericValue().longValue());
assertEquals((long) (value * scalingFactor), dvField.numericValue().longValue());
assertFalse(dvField.fieldType().stored());
}

private XContentBuilder getStarTreeMappingWithScaledFloat(
double scalingFactorField1,
double scalingFactorField2,
double scalingFactorField3
) throws IOException {
return topMapping(b -> {
b.startObject("composite");
b.startObject("startree");
b.field("type", "star_tree");
b.startObject("config");
b.field("max_leaf_docs", 100);
b.startArray("ordered_dimensions");
b.startObject();
b.field("name", "field1");
b.endObject();
b.startObject();
b.field("name", "field2");
b.endObject();
b.endArray();
b.startArray("metrics");
b.startObject();
b.field("name", "field3");
b.startArray("stats");
b.value("sum");
b.value("value_count");
b.endArray();
b.endObject();
b.endArray();
b.endObject();
b.endObject();
b.endObject();
b.startObject("properties");
b.startObject("field1");
b.field("type", "scaled_float").field("scaling_factor", scalingFactorField1);
b.endObject();
b.startObject("field2");
b.field("type", "scaled_float").field("scaling_factor", scalingFactorField2);
b.endObject();
b.startObject("field3");
b.field("type", "scaled_float").field("scaling_factor", scalingFactorField3);
b.endObject();
b.endObject();
});
}

public void testDefaults() throws Exception {
XContentBuilder mapping = fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 10.0));
DocumentMapper mapper = createDocumentMapper(mapping);
assertEquals(mapping.toString(), mapper.mappingSource().toString());

ParsedDocument doc = mapper.parse(source(b -> b.field("field", 123)));
validateScaledFloatFields(doc, "field", 123, 10.0);
}

public void testMissingScalingFactor() {
Exception e = expectThrows(
MapperParsingException.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.opensearch.action.index.IndexResponse;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.action.support.master.AcknowledgedResponse;
import org.opensearch.common.Rounding;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.core.common.unit.ByteSizeUnit;
Expand All @@ -23,7 +22,6 @@
import org.opensearch.index.IndexService;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.compositeindex.CompositeIndexSettings;
import org.opensearch.index.compositeindex.datacube.DateDimension;
import org.opensearch.index.compositeindex.datacube.MetricStat;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
Expand Down Expand Up @@ -62,7 +60,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.field("name", "timestamp")
.field("name", "numeric_dv_1")
.endObject()
.startObject()
.field("name", "numeric_dv_2")
.endObject()
.startObject()
.field("name", getDim(invalidDim, keywordDim))
Expand All @@ -84,6 +85,14 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv_1")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv_2")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric")
.field("type", "integer")
.field("doc_values", false)
Expand Down Expand Up @@ -112,11 +121,7 @@ private static XContentBuilder createMaxDimTestMapping() {
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.field("name", "timestamp")
.startArray("calendar_intervals")
.value("day")
.value("month")
.endArray()
.field("name", "dim4")
.endObject()
.startObject()
.field("name", "dim2")
Expand Down Expand Up @@ -201,7 +206,7 @@ private static XContentBuilder createUpdateTestMapping(boolean changeDim, boolea
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.field("name", "timestamp")
.field("name", "numeric_dv1")
.endObject()
.startObject()
.field("name", changeDim ? "numeric_new" : getDim(false, false))
Expand All @@ -223,6 +228,10 @@ private static XContentBuilder createUpdateTestMapping(boolean changeDim, boolea
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv1")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric")
.field("type", "integer")
.field("doc_values", false)
Expand Down Expand Up @@ -256,7 +265,7 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.field("name", "timestamp")
.field("name", "numeric_dv2")
.endObject()
.startObject()
.field("name", "numeric_dv")
Expand Down Expand Up @@ -284,6 +293,10 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv2")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv1")
.field("type", "integer")
.field("doc_values", true)
Expand Down Expand Up @@ -328,15 +341,8 @@ public void testValidCompositeIndex() {
for (CompositeMappedFieldType ft : fts) {
assertTrue(ft instanceof StarTreeMapper.StarTreeFieldType);
StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) ft;
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<Rounding.DateTimeUnit> expectedTimeUnits = Arrays.asList(
Rounding.DateTimeUnit.MINUTES_OF_HOUR,
Rounding.DateTimeUnit.HOUR_OF_DAY
);
assertEquals(expectedTimeUnits, dateDim.getIntervals());
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv_1", starTreeFieldType.getDimensions().get(0).getField());
assertEquals("numeric_dv_2", starTreeFieldType.getDimensions().get(1).getField());
assertEquals(2, starTreeFieldType.getMetrics().size());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());

Expand Down Expand Up @@ -496,15 +502,8 @@ public void testUpdateIndexWhenMappingIsSame() {
for (CompositeMappedFieldType ft : fts) {
assertTrue(ft instanceof StarTreeMapper.StarTreeFieldType);
StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) ft;
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<Rounding.DateTimeUnit> expectedTimeUnits = Arrays.asList(
Rounding.DateTimeUnit.MINUTES_OF_HOUR,
Rounding.DateTimeUnit.HOUR_OF_DAY
);
assertEquals(expectedTimeUnits, dateDim.getIntervals());
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv_1", starTreeFieldType.getDimensions().get(0).getField());
assertEquals("numeric_dv_2", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());

// Assert default metrics
Expand Down Expand Up @@ -570,24 +569,6 @@ public void testMaxMetricsCompositeIndex() {
);
}

public void testMaxCalendarIntervalsCompositeIndex() {
MapperParsingException ex = expectThrows(
MapperParsingException.class,
() -> prepareCreate(TEST_INDEX).setMapping(createMaxDimTestMapping())
.setSettings(
Settings.builder()
.put(StarTreeIndexSettings.STAR_TREE_MAX_DATE_INTERVALS_SETTING.getKey(), 1)
.put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true)
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB))
)
.get()
);
assertEquals(
"Failed to parse mapping [_doc]: At most [1] calendar intervals are allowed in dimension [timestamp]",
ex.getMessage()
);
}

public void testUnsupportedDim() {
MapperParsingException ex = expectThrows(
MapperParsingException.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.xcontent.support.XContentMapValues;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
import org.opensearch.index.mapper.DateFieldMapper;
import org.opensearch.index.mapper.Mapper;
import org.opensearch.index.mapper.NumberFieldMapper;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -55,11 +53,13 @@ public static Dimension parseAndCreateDimension(
Map<String, Object> dimensionMap,
Mapper.TypeParser.ParserContext c
) {
if (builder instanceof DateFieldMapper.Builder) {
if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.DATE)) {
return parseAndCreateDateDimension(name, dimensionMap, c);
} else if (builder instanceof NumberFieldMapper.Builder) {
return new NumericDimension(name);
}
} else if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.NUMERIC)) {
return new NumericDimension(name);
}
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name)
);
Expand Down
Loading

0 comments on commit 3f8272b

Please sign in to comment.