From 4894ad2c067594943cfb03629ccdb5e307e39c83 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 3 May 2024 13:29:25 -0700 Subject: [PATCH] Java CDK: DV2 tests have separate testcases for safe_cast (#36935) --- airbyte-cdk/java/airbyte-cdk/README.md | 1 + .../src/main/resources/version.properties | 2 +- .../BaseSqlGeneratorIntegrationTest.kt | 289 ++++++++++++------ .../dat/sync1_cursorchange_messages.jsonl | 2 +- .../resources/dat/sync1_messages.jsonl | 4 +- .../all_types_v1_inputrecords.jsonl | 3 - .../sqlgenerator/alltypes_inputrecords.jsonl | 5 +- .../alltypes_unsafe_inputrecords.jsonl | 3 - .../cdcupdate_inputrecords_raw.jsonl | 2 - .../incrementaldedup_inputrecords.jsonl | 2 +- .../mixedcasecolumnname_inputrecords.jsonl | 1 + .../safe_cast/all_types_v1_inputrecords.jsonl | 2 + .../safe_cast/alltypes_inputrecords.jsonl | 2 + .../cdcupdate_inputrecords_raw.jsonl | 2 + .../incrementaldedup_inputrecords.jsonl | 2 + 15 files changed, 218 insertions(+), 104 deletions(-) delete mode 100644 airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_unsafe_inputrecords.jsonl create mode 100644 airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/mixedcasecolumnname_inputrecords.jsonl create mode 100644 airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/all_types_v1_inputrecords.jsonl create mode 100644 airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/alltypes_inputrecords.jsonl create mode 100644 airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/cdcupdate_inputrecords_raw.jsonl create mode 100644 airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/incrementaldedup_inputrecords.jsonl diff --git a/airbyte-cdk/java/airbyte-cdk/README.md b/airbyte-cdk/java/airbyte-cdk/README.md index ae11dbbe2f7b..37a290b5a348 100644 --- a/airbyte-cdk/java/airbyte-cdk/README.md +++ b/airbyte-cdk/java/airbyte-cdk/README.md @@ -174,6 +174,7 @@ corresponds to that version. | Version | Date | Pull Request | Subject | | :------ | :--------- | :--------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 0.33.0 | 2024-05-03 | [\#36935](https://github.com/airbytehq/airbyte/pull/36935) | Destinations: Enable non-safe-casting DV2 tests | | 0.32.0 | 2024-05-03 | [\#36929](https://github.com/airbytehq/airbyte/pull/36929) | Destinations: Assorted DV2 changes for mysql | | 0.31.7 | 2024-05-02 | [\#36910](https://github.com/airbytehq/airbyte/pull/36910) | changes for destination-snowflake | | 0.31.6 | 2024-05-02 | [\#37746](https://github.com/airbytehq/airbyte/pull/37746) | debuggability improvements. | diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties index a730574df844..47feb6ea8a08 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties @@ -1 +1 @@ -version=0.32.0 +version=0.33.0 diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt index ac58a8433c95..6f839fd6c9d7 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt @@ -21,8 +21,12 @@ import java.util.function.Consumer import java.util.function.Function import java.util.stream.Collectors import java.util.stream.Stream +import kotlin.test.assertFails import org.junit.jupiter.api.AfterEach import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Assumptions.assumeFalse +import org.junit.jupiter.api.Assumptions.assumeTrue import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertAll @@ -74,7 +78,7 @@ abstract class BaseSqlGeneratorIntegrationTest = mock() protected abstract val sqlGenerator: SqlGenerator - get + protected abstract val supportsSafeCast: Boolean /** * Subclasses should override this method if they need to make changes to the stream ID. For @@ -287,8 +291,8 @@ abstract class BaseSqlGeneratorIntegrationTest = + ArrayList( + BaseTypingDedupingTest.readRecords( + "sqlgenerator/incrementaldedup_inputrecords.jsonl" + ) ) - ) + if (supportsSafeCast) { + inputRecords.addAll( + BaseTypingDedupingTest.readRecords( + "sqlgenerator/safe_cast/incrementaldedup_inputrecords.jsonl" + ) + ) + } + insertRawTableRecords(streamId, inputRecords) executeTypeAndDedupe( generator, @@ -947,12 +1020,20 @@ abstract class BaseSqlGeneratorIntegrationTest = + ArrayList( + BaseTypingDedupingTest.readRecords( + "sqlgenerator/incrementaldedup_inputrecords.jsonl" + ) ) - ) + if (supportsSafeCast) { + inputRecords.addAll( + BaseTypingDedupingTest.readRecords( + "sqlgenerator/safe_cast/incrementaldedup_inputrecords.jsonl" + ) + ) + } + insertRawTableRecords(streamId, inputRecords) executeTypeAndDedupe( generator, @@ -962,7 +1043,12 @@ abstract class BaseSqlGeneratorIntegrationTest = + ArrayList( + BaseTypingDedupingTest.readRecords("sqlgenerator/cdcupdate_inputrecords_raw.jsonl") ) - ) + if (supportsSafeCast) { + inputRecords.addAll( + BaseTypingDedupingTest.readRecords( + "sqlgenerator/safe_cast/cdcupdate_inputrecords_raw.jsonl" + ) + ) + } + insertRawTableRecords(streamId, inputRecords) insertFinalTableRecords( true, streamId, @@ -1109,7 +1201,7 @@ abstract class BaseSqlGeneratorIntegrationTest = + ArrayList( + BaseTypingDedupingTest.readRecords("sqlgenerator/all_types_v1_inputrecords.jsonl") ) - ) + if (supportsSafeCast) { + inputRecords.addAll( + BaseTypingDedupingTest.readRecords( + "sqlgenerator/safe_cast/all_types_v1_inputrecords.jsonl" + ) + ) + } + insertV1RawTableRecords(v1RawTableStreamId, inputRecords) val migration = generator.migrateFromV1toV2( streamId, @@ -1539,9 +1632,16 @@ abstract class BaseSqlGeneratorIntegrationTest @@ -1609,8 +1709,8 @@ abstract class BaseSqlGeneratorIntegrationTest { + val baseRecords = + BaseTypingDedupingTest.readRecords("sqlgenerator/alltypes_inputrecords.jsonl") + return if (includeSafeCastRecords) { + baseRecords + + BaseTypingDedupingTest.readRecords( + "sqlgenerator/safe_cast/alltypes_inputrecords.jsonl" + ) + } else { + baseRecords + } + } } diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/dat/sync1_cursorchange_messages.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/dat/sync1_cursorchange_messages.jsonl index e8262c202587..a91d5f309b6f 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/dat/sync1_cursorchange_messages.jsonl +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/dat/sync1_cursorchange_messages.jsonl @@ -1,4 +1,4 @@ {"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 200, "old_cursor": 0, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}}} {"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 200, "old_cursor": 1, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}}} {"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}}} -{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}}} +{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": 42, "registration_date": "2023-12-23"}}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/dat/sync1_messages.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/dat/sync1_messages.jsonl index a37e8a603749..458e77d0cdf3 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/dat/sync1_messages.jsonl +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/dat/sync1_messages.jsonl @@ -8,7 +8,7 @@ {"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}}} // Emit a record with no _ab_cdc_deleted_at field. CDC sources typically emit an explicit null, but we should handle both cases. {"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}}} -// Emit a record with an invalid age & address nulled at source. -{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}, "meta": {"changes": [{"field": "address", "change": "NULLED", "reason": "SOURCE_RETRIEVAL_ERROR"}]}}} +// Emit a record with address nulled at source. +{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": 42, "registration_date": "2023-12-23"}, "meta": {"changes": [{"field": "address", "change": "NULLED", "reason": "SOURCE_RETRIEVAL_ERROR"}]}}} // Emit a record with interesting characters in one of the values. {"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 3, "id2": 200, "updated_at": "2000-01-01T00:04:00Z", "name": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"}}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/all_types_v1_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/all_types_v1_inputrecords.jsonl index e2cde49ad980..d54194eadac2 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/all_types_v1_inputrecords.jsonl +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/all_types_v1_inputrecords.jsonl @@ -1,7 +1,4 @@ {"_airbyte_ab_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "foo", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} {"_airbyte_ab_id": "53ce75a5-5bcc-47a3-b45c-96c2015cfe35", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": null, "struct": null, "string": null, "number": null, "integer": null, "boolean": null, "timestamp_with_timezone": null, "timestamp_without_timezone": null, "time_with_timezone": null, "time_without_timezone": null, "date": null, "unknown": null}} {"_airbyte_ab_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fbe", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00Z"}} -// Note that array and struct have invalid values ({} and [] respectively). -{"_airbyte_ab_id": "84242b60-3a34-4531-ad75-a26702960a9a", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": {}, "struct": [], "string": null, "number": "foo", "integer": "bar", "boolean": "fizz", "timestamp_with_timezone": {}, "timestamp_without_timezone": {}, "time_with_timezone": {}, "time_without_timezone": {}, "date": "airbyte", "unknown": null}} {"_airbyte_ab_id": "a4a783b5-7729-4d0b-b659-48ceb08713f1", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 5, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "number": 67.174118, "struct": {"nested_number": 67.174118}, "array": [67.174118], "unknown": 67.174118}} -{"_airbyte_ab_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fce", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 6, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "IamACaseSensitiveColumnName": "Case senstive value"}} \ No newline at end of file diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_inputrecords.jsonl index 8f8ced8a26a1..b965f22e8bad 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_inputrecords.jsonl +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_inputrecords.jsonl @@ -1,7 +1,4 @@ {"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "foo", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} {"_airbyte_raw_id": "53ce75a5-5bcc-47a3-b45c-96c2015cfe35", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": null, "struct": null, "string": null, "number": null, "integer": null, "boolean": null, "timestamp_with_timezone": null, "timestamp_without_timezone": null, "time_with_timezone": null, "time_without_timezone": null, "date": null, "unknown": null}} -{"_airbyte_raw_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fbe", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00Z"}} -// Note that array and struct have invalid values ({} and [] respectively). -{"_airbyte_raw_id": "84242b60-3a34-4531-ad75-a26702960a9a", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": {}, "struct": [], "string": null, "number": "foo", "integer": "bar", "boolean": "fizz", "timestamp_with_timezone": {}, "timestamp_without_timezone": {}, "time_with_timezone": {}, "time_without_timezone": {}, "date": "airbyte", "unknown": null}, "_airbyte_meta": {"changes": [{"field": "string", "change": "NULLED", "reason": "SOURCE_SERIALIZATION_ERROR"}]}} +{"_airbyte_raw_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fbe", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00Z"}, "_airbyte_meta": {"changes": [{"field": "string", "change": "NULLED", "reason": "SOURCE_SERIALIZATION_ERROR"}]}} {"_airbyte_raw_id": "a4a783b5-7729-4d0b-b659-48ceb08713f1", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 5, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "number": 67.174118, "struct": {"nested_number": 67.174118}, "array": [67.174118], "unknown": 67.174118}} -{"_airbyte_raw_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fce", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 6, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "IamACaseSensitiveColumnName": "Case senstive value"}} \ No newline at end of file diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_unsafe_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_unsafe_inputrecords.jsonl deleted file mode 100644 index 55a509408d14..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_unsafe_inputrecords.jsonl +++ /dev/null @@ -1,3 +0,0 @@ -// this is a strict subset of the alltypes_inputrecords file. All these records have valid values, i.e. can be processed with unsafe casting. -{"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "foo", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} -{"_airbyte_raw_id": "53ce75a5-5bcc-47a3-b45c-96c2015cfe35", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": null, "struct": null, "string": null, "number": null, "integer": null, "boolean": null, "timestamp_with_timezone": null, "timestamp_without_timezone": null, "time_with_timezone": null, "time_without_timezone": null, "date": null, "unknown": null}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/cdcupdate_inputrecords_raw.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/cdcupdate_inputrecords_raw.jsonl index e5752b06c025..2de80d327906 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/cdcupdate_inputrecords_raw.jsonl +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/cdcupdate_inputrecords_raw.jsonl @@ -12,5 +12,3 @@ {"_airbyte_raw_id": "4d8674a5-eb6e-41ca-a310-69c64c88d101", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 0, "id2": 100, "updated_at": "2023-01-01T05:00:00Z", "_ab_cdc_deleted_at": null, "string": "zombie_returned"}} // CDC generally outputs an explicit null for deleted_at, but verify that we can also handle the case where deleted_at is unset. {"_airbyte_raw_id": "f0b59e49-8c74-4101-9f14-cb4d1193fd5a", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T06:00:00Z", "string": "charlie"}} -// Invalid values in _ab_cdc_deleted_at result in the record NOT being deleted. This behavior is up for debate, but it's an extreme edge case so not a high priority. -{"_airbyte_raw_id": "d4e1d989-c115-403c-9e68-5d320e6376bb", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 5, "id2": 100, "updated_at": "2023-01-01T07:00:00Z", "_ab_cdc_deleted_at": {}, "string": "david1"}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/incrementaldedup_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/incrementaldedup_inputrecords.jsonl index 1d850d9dc74b..b2e0d7925650 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/incrementaldedup_inputrecords.jsonl +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/incrementaldedup_inputrecords.jsonl @@ -1,3 +1,3 @@ {"_airbyte_raw_id": "d7b81af0-01da-4846-a650-cc398986bc99", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}} {"_airbyte_raw_id": "80c99b54-54b4-43bd-b51b-1f67dafa2c52", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}} -{"_airbyte_raw_id": "ad690bfb-c2c2-4172-bd73-a16c86ccbb67", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}} +{"_airbyte_raw_id": "ad690bfb-c2c2-4172-bd73-a16c86ccbb67", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": 126}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/mixedcasecolumnname_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/mixedcasecolumnname_inputrecords.jsonl new file mode 100644 index 000000000000..c3ba7ea1519e --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/mixedcasecolumnname_inputrecords.jsonl @@ -0,0 +1 @@ +{"_airbyte_raw_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fce", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 6, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "IamACaseSensitiveColumnName": "Case senstive value"}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/all_types_v1_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/all_types_v1_inputrecords.jsonl new file mode 100644 index 000000000000..b2f0357034ae --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/all_types_v1_inputrecords.jsonl @@ -0,0 +1,2 @@ +// Note that array and struct have invalid values ({} and [] respectively). +{"_airbyte_ab_id": "84242b60-3a34-4531-ad75-a26702960a9a", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": {}, "struct": [], "string": null, "number": "foo", "integer": "bar", "boolean": "fizz", "timestamp_with_timezone": {}, "timestamp_without_timezone": {}, "time_with_timezone": {}, "time_without_timezone": {}, "date": "airbyte", "unknown": null}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/alltypes_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/alltypes_inputrecords.jsonl new file mode 100644 index 000000000000..2eb8fae2374d --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/alltypes_inputrecords.jsonl @@ -0,0 +1,2 @@ +// Note that array and struct have invalid values ({} and [] respectively). +{"_airbyte_raw_id": "84242b60-3a34-4531-ad75-a26702960a9a", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": {}, "struct": [], "string": null, "number": "foo", "integer": "bar", "boolean": "fizz", "timestamp_with_timezone": {}, "timestamp_without_timezone": {}, "time_with_timezone": {}, "time_without_timezone": {}, "date": "airbyte", "unknown": null}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/cdcupdate_inputrecords_raw.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/cdcupdate_inputrecords_raw.jsonl new file mode 100644 index 000000000000..31439993f8e1 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/cdcupdate_inputrecords_raw.jsonl @@ -0,0 +1,2 @@ +// Invalid values in _ab_cdc_deleted_at result in the record NOT being deleted. This behavior is up for debate, but it's an extreme edge case so not a high priority. +{"_airbyte_raw_id": "d4e1d989-c115-403c-9e68-5d320e6376bb", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 5, "id2": 100, "updated_at": "2023-01-01T07:00:00Z", "_ab_cdc_deleted_at": {}, "string": "david1"}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/incrementaldedup_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/incrementaldedup_inputrecords.jsonl new file mode 100644 index 000000000000..b67035c060f2 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/incrementaldedup_inputrecords.jsonl @@ -0,0 +1,2 @@ +// This record verifies that we can update an existing record, with a new record where one column has an invalid value. +{"_airbyte_raw_id": "b9ac9f01-abc1-4e7c-89e5-eac9223d5726", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T03:00:01Z", "string": "Bob", "integer": "oops"}}