Skip to content

Commit

Permalink
Add optimization to fail-fast on annotations (#285)
Browse files Browse the repository at this point in the history
  • Loading branch information
popematt authored Sep 25, 2023
1 parent b8d7cb1 commit a3fabe9
Show file tree
Hide file tree
Showing 5 changed files with 191 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,63 @@ class IonSchemaSystemBuilder private constructor() {
return this
}

/**
* Causes the evaluation of a value against a type definition to return early if the value is
* invalid for the given `annotations` constraint, if one is present. If unset, this option
* defaults to `false`.
*
* ### Rationale
*
* If you use annotations as application-defined type tags on Ion values, this option can benefit
* you in two ways—it will reduce noise in the [Violations] data when calling [Type.validate],
* and it can result in performance improvements, especially in cases where the type definition
* includes constraints such as `element`, `fields` and `ordered_elements` which constrain child
* values of a container.
*
* ### Example
*
* Consider the following type definitions (which are functionally equivalent for Ion Schema 1.0 and 2.0).
* ```
* type::{
* name: foo_struct,
* annotations: closed::required::[foo],
* fields: {
* id: { occurs: required, type: string, regex: "^[0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}$" }
* a: int,
* b: bool,
* }
* }
* type::{
* name: bar_struct,
* annotations: closed::required::[foo],
* fields: {
* id: { occurs: required, type: string, regex: "^[0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}$" }
* b: int,
* c: timestamp,
* }
* }
* type::{
* name: foobar,
* one_of: [foo_struct, bar_struct],
* }
* ```
* With this feature enabled, when validating a value such as `bar::{ id: "qwerty" }` against
* the `foobar` type, the comparison with the `foo_struct` as part of the `one_of` constraint
* will not check all the fields in the struct because the struct does not match the annotations
* of the `foo_struct` type. If you were to validate a value such as `quux::{ id: "qwerty" }`,
* there would be no validation of the fields at all because the annotation `quux` does not match
* the annotations of `foo_struct` or `bar_struct`.
*
* @since 1.7
*/
fun failFastOnInvalidAnnotations(boolean: Boolean): IonSchemaSystemBuilder {
params[IonSchemaSystemImpl.Param.SHORT_CIRCUIT_ON_INVALID_ANNOTATIONS] = boolean
return this
}

/**
* Provides a callback for the IonSchemaSystem to send a warning message about
* things that are not fatal (ie. will not result in an exception being thrown).
* things that are not fatal (i.e. will not result in an exception being thrown).
* Content of the messages may include information about possible errors in
* schemas, and usage of features that may be deprecated in future versions of
* the Ion Schema Language or the `ion-schema-kotlin` library.
Expand All @@ -143,7 +197,7 @@ class IonSchemaSystemBuilder private constructor() {

/**
* Provides a callback for the IonSchemaSystem to send a warning message about
* things that are not fatal (ie. will not result in an exception being thrown).
* things that are not fatal (i.e. will not result in an exception being thrown).
* Content of the messages may include information about possible errors in
* schemas, and usage of features that may be deprecated in future versions of
* the Ion Schema Language or the `ion-schema-kotlin` library.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,5 +185,9 @@ internal class IonSchemaSystemImpl(
// for backwards compatibility with v1.1
// Default is to keep the backward compatible behavior
object ALLOW_TRANSITIVE_IMPORTS : Param<Boolean>(true)
// Introduced in v1.7.0.
// This is a performance improvement that skips all other constraints if the
// annotation doesn't match, regardless of whether the fail-fast was called.
object SHORT_CIRCUIT_ON_INVALID_ANNOTATIONS : Param<Boolean>(false)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import com.amazon.ion.IonValue
import com.amazon.ion.system.IonSystemBuilder
import com.amazon.ionschema.IonSchemaVersion
import com.amazon.ionschema.Violations
import com.amazon.ionschema.internal.IonSchemaSystemImpl.Param.SHORT_CIRCUIT_ON_INVALID_ANNOTATIONS
import com.amazon.ionschema.internal.constraint.ConstraintBase
import com.amazon.ionschema.internal.util.markReadOnly

Expand All @@ -40,6 +41,22 @@ internal class TypeImpl(
private companion object {
private val ION = IonSystemBuilder.standard().build()
private val ANY = ION.newSymbol("any")

/**
* Order in which constraints should be evaluated. Lower is first.
*
* The [validate] function in this class has optional short-circuit logic for the `annotations` constraint, so
* `annotations` gets special treatment and will be evaluated first.
*/
private val CONSTRAINT_EVALUATION_ORDER = mapOf(
"annotations" to -1,
// By default, all constraints are priority 0
)
private val CONSTRAINT_PRIORITY_COMPARATOR = Comparator<Constraint> {
a, b ->
CONSTRAINT_EVALUATION_ORDER.getOrDefault(a.name, 0)
.compareTo(CONSTRAINT_EVALUATION_ORDER.getOrDefault(b.name, 0))
}
}

override val isl = ionStruct.markReadOnly()
Expand All @@ -61,6 +78,8 @@ internal class TypeImpl(
}
}

constraints.sortWith(CONSTRAINT_PRIORITY_COMPARATOR)

if (schema is SchemaImpl_2_0) schema.validateFieldNamesInType(ionStruct)
}

Expand All @@ -84,7 +103,27 @@ internal class TypeImpl(
override fun isValidForBaseType(value: IonValue) = getBaseType().isValidForBaseType(value)

override fun validate(value: IonValue, issues: Violations) {
constraints.forEach {
val constraintIterator = constraints.iterator()

// Handle short-circuit returns for `annotations`, if enabled
if (schema.getSchemaSystem().getParam(SHORT_CIRCUIT_ON_INVALID_ANNOTATIONS)) {
// To avoid adding unnecessary branches for all the constraints that are not "annotations",
// this short circuit logic will run until there is a constraint that is not named "annotations"
// and then fall back to the default logic for the remaining constraints.
while (constraintIterator.hasNext()) {
val c = constraintIterator.next()
if (c.name == "annotations") {
val checkpoint = issues.checkpoint()
c.validate(value, issues)
if (!checkpoint.isValid()) return
} else {
// No more "annotations", so handle normally and then exit the loop
c.validate(value, issues)
break
}
}
}
constraintIterator.forEach {
it.validate(value, issues)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package com.amazon.ionschema

import com.amazon.ion.IonValue
import com.amazon.ion.system.IonSystemBuilder
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.Test

class FailFastOnInvalidAnnotationsTest {

companion object {
const val FOO_BAR_BAZ_SCHEMA_ISL_1 = """
type::{
name: symbol_or_int_list,
one_of:[
{
annotations: closed::required::[symbols],
element: symbol,
},
{
annotations: closed::required::[ints],
element: int,
},
{
annotations: closed::required::[],
valid_values: [null],
}
]
}
"""
const val FOO_BAR_BAZ_SCHEMA_ISL_2 = "\$ion_schema_2_0 \n $FOO_BAR_BAZ_SCHEMA_ISL_1"
}
val ion = IonSystemBuilder.standard().build()

val values = ion.loader.load(
"""
symbols::[a, b, c, d, e]
ints::[1, 2, 3, 4, 5]
ints::[a, b, c, d, e] // invalid
null
""".trimIndent()
)

val failFastSystem = IonSchemaSystemBuilder.standard().failFastOnInvalidAnnotations(true).build()
val failSlowSystem = IonSchemaSystemBuilder.standard().failFastOnInvalidAnnotations(false).build()

@Test
fun testAnnotationFastFailImprovesIonSchema1() {
val typeName = "symbol_or_int_list"
benchmark("Warmup 1", failSlowSystem, FOO_BAR_BAZ_SCHEMA_ISL_1, typeName, values)
benchmark("Warmup 2", failFastSystem, FOO_BAR_BAZ_SCHEMA_ISL_1, typeName, values)
val isl1Baseline = benchmark("ISL 1.0 Baseline", failSlowSystem, FOO_BAR_BAZ_SCHEMA_ISL_1, typeName, values)
val isl1FastFail = benchmark("ISL 1.0 Fast Fail", failFastSystem, FOO_BAR_BAZ_SCHEMA_ISL_1, typeName, values)
println("Change: ${(isl1FastFail - isl1Baseline) / isl1Baseline}")

assertTrue(isl1FastFail > isl1Baseline)
}

@Test
fun testAnnotationFastFailImprovesIonSchema2() {
val typeName = "symbol_or_int_list"
benchmark("Warmup 1", failSlowSystem, FOO_BAR_BAZ_SCHEMA_ISL_2, typeName, values)
benchmark("Warmup 2", failFastSystem, FOO_BAR_BAZ_SCHEMA_ISL_2, typeName, values)
val isl2Baseline = benchmark("ISL 2.0 Baseline", failSlowSystem, FOO_BAR_BAZ_SCHEMA_ISL_2, typeName, values)
val isl2FastFail = benchmark("ISL 2.0 Fast Fail", failFastSystem, FOO_BAR_BAZ_SCHEMA_ISL_2, typeName, values)
println("Change: ${(isl2FastFail - isl2Baseline) / isl2Baseline}")

assertTrue(isl2FastFail > isl2Baseline)
}

private fun benchmark(name: String, iss: IonSchemaSystem, schemaString: String, typeName: String, values: List<IonValue>): Double {
val schema = iss.newSchema(schemaString)

val fooType = schema.getType(typeName)!!
val totalOps = 10000
val times = (1..30).map {
val start = System.nanoTime()
for (i in 1..totalOps) {
fooType.validate(values[i % values.size])
}
val end = System.nanoTime()
end - start
}
// ops/s = ops/t_avg * ns/s
val opsPerSecond = totalOps / times.average() * 1000000000
println("$name ops/s = $opsPerSecond")
return opsPerSecond
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ class IonSchemaTests_2_0 : TestFactory by IonSchemaTestsRunner(v2_0)
*/
class IonSchemaTestsRunner(
islVersion: IonSchemaVersion,
systemBuilder: IonSchemaSystemBuilder = IonSchemaSystemBuilder.standard().allowTransitiveImports(false),
systemBuilder: IonSchemaSystemBuilder = IonSchemaSystemBuilder.standard()
.allowTransitiveImports(false)
.failFastOnInvalidAnnotations(true),
additionalFileFilter: (File) -> Boolean = { true },
private val testNameFilter: (String) -> Boolean = { true },
) : TestFactory {
Expand Down

0 comments on commit a3fabe9

Please sign in to comment.