diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 000000000..a8497c94d --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,105 @@ +# Copyright © 2023 Cask Data, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +# This workflow will build a Java project with Maven +# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven +# Note: Any changes to this workflow would be used only after merging into develop +name: Build e2e tests + +on: + push: + branches: [ develop ] + pull_request: + branches: [ develop ] + types: [ opened, synchronize, reopened, labeled ] + workflow_dispatch: + +jobs: + build: + runs-on: k8s-runner-e2e + # We allow builds: + # 1) When triggered manually + # 2) When it's a merge into a branch + # 3) For PRs that are labeled as build and + # - It's a code change + # - A build label was just added + # A bit complex, but prevents builds when other labels are manipulated + if: > + github.event_name == 'workflow_dispatch' + || github.event_name == 'push' + || (contains(github.event.pull_request.labels.*.name, 'build') + && (github.event.action != 'labeled' || github.event.label.name == 'build') + ) + strategy: + matrix: + module: [wrangler-transform] + fail-fast: false + + steps: + # Pinned 1.0.0 version + - uses: actions/checkout@v3 + with: + path: plugin + submodules: 'recursive' + ref: ${{ github.event.workflow_run.head_sha }} + + - uses: dorny/paths-filter@b2feaf19c27470162a626bd6fa8438ae5b263721 + if: github.event_name != 'workflow_dispatch' && github.event_name != 'push' + id: filter + with: + working-directory: plugin + filters: | + e2e-test: + - '${{ matrix.module }}/**/e2e-test/**' + + - name: Checkout e2e test repo + uses: actions/checkout@v3 + with: + repository: cdapio/cdap-e2e-tests + path: e2e + + - name: Cache + uses: actions/cache@v3 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ github.workflow }}-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven-${{ github.workflow }} + + - name: Run required e2e tests + if: github.event_name != 'workflow_dispatch' && github.event_name != 'push' && steps.filter.outputs.e2e-test == 'false' + run: python3 e2e/src/main/scripts/run_e2e_test.py --module ${{ matrix.module }} --testRunner TestRunnerRequired.java + + - name: Run all e2e tests + if: github.event_name == 'workflow_dispatch' || github.event_name == 'push' || steps.filter.outputs.e2e-test == 'true' + run: python3 e2e/src/main/scripts/run_e2e_test.py --module ${{ matrix.module }} + + - name: Upload report + uses: actions/upload-artifact@v3 + if: always() + with: + name: Cucumber report - ${{ matrix.module }} + path: ./**/target/cucumber-reports + + - name: Upload debug files + uses: actions/upload-artifact@v3 + if: always() + with: + name: Debug files - ${{ matrix.module }} + path: ./**/target/e2e-debug + + - name: Upload files to GCS + uses: google-github-actions/upload-cloud-storage@v0 + if: always() + with: + path: ./plugin + destination: e2e-tests-cucumber-reports/${{ github.event.repository.name }}/${{ github.ref }} + glob: '**/target/cucumber-reports/**' diff --git a/pom.xml b/pom.xml index 0907f6871..b43b5c99d 100644 --- a/pom.xml +++ b/pom.xml @@ -100,7 +100,7 @@ 1.106.0 2.6.2 2.0.0 - 20.0 + 31.0.1-jre 2.4.0 2.2 2.2.4 @@ -122,6 +122,7 @@ 1.11 1.7.15 0.4 + ${project.basedir}/src/test/java/ @@ -172,6 +173,7 @@ + ${testSourceLocation} @@ -186,7 +188,7 @@ org.apache.felix maven-bundle-plugin - 3.3.0 + 3.5.0 true @@ -397,7 +399,6 @@ releases - org.sonatype.plugins nexus-staging-maven-plugin @@ -429,6 +430,135 @@ + + e2e-tests + + src/e2e-test/java + TestRunner.java + + + + + src/e2e-test/resources + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.18.1 + + true + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.0.0 + + + org.apache.maven.surefire + surefire-junit47 + 3.0.0 + + + + + ${TEST_RUNNER} + + + classes + 2 + 2 + true + + + + ${GOOGLE_APPLICATION_CREDENTIALS} + + + ${SERVICE_ACCOUNT_TYPE} + + + ${SERVICE_ACCOUNT_FILE_PATH} + + + ${SERVICE_ACCOUNT_JSON} + + + + + + + integration-test + verify + + + + + + + net.masterthought + maven-cucumber-reporting + 5.5.0 + + + + execution + verify + + generate + + + Cucumber Reports + target/cucumber-reports/advanced-reports + 1 + false + ${project.build.directory}/cucumber-reports + + **/*.json + + ${project.build.directory}/cucumber-reports + true + + + + + + + + + + com.google.guava + guava + ${guava.version} + + + + + + + org.slf4j + slf4j-api + 1.7.15 + + + + io.cdap.tests.e2e + cdap-e2e-framework + 0.3.0-SNAPSHOT + test + + + + ch.qos.logback + logback-classic + 1.2.8 + runtime + + + + - diff --git a/wrangler-transform/pom.xml b/wrangler-transform/pom.xml index c99f8e3d8..b09712c0d 100644 --- a/wrangler-transform/pom.xml +++ b/wrangler-transform/pom.xml @@ -215,5 +215,4 @@ - diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature new file mode 100644 index 000000000..fa59cb54c --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature @@ -0,0 +1,43 @@ +# Copyright © 2023 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: Wrangler - Run time scenarios + + @BQ_SOURCE_CSV_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse csv directive + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_parse_csv" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Click on the Validate button + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Click on the Validate button + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_csv" diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature new file mode 100644 index 000000000..2dd8fd0da --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature @@ -0,0 +1,43 @@ +# Copyright © 2023 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: Wrangler - Run time scenarios + + @BQ_SOURCE_FXDLEN_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse fixedlength directive + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_parse_Fixed_Length" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Click on the Validate button + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Click on the Validate button + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_FixedLength" diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature new file mode 100644 index 000000000..3f7aae3aa --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature @@ -0,0 +1,43 @@ +# Copyright © 2023 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: Wrangler - Run time scenarios + + @BQ_SOURCE_HL7_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse hl7 directive + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_parse_hl7" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Click on the Validate button + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Click on the Validate button + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_hl7" diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsJson.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsJson.feature new file mode 100644 index 000000000..c1833f2b7 --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsJson.feature @@ -0,0 +1,43 @@ +# Copyright © 2023 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: Wrangler - Run time scenarios + + @BQ_SOURCE_JSON_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse json directive + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_parse_json" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Click on the Validate button + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Click on the Validate button + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_json" diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java new file mode 100644 index 000000000..bd9dbba90 --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java @@ -0,0 +1,142 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.common.stepsdesign; + +import com.google.cloud.bigquery.BigQueryException; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.StorageException; +import io.cdap.e2e.utils.BigQueryClient; +import io.cdap.e2e.utils.PluginPropertyUtils; +import io.cdap.e2e.utils.StorageClient; +import io.cucumber.java.After; +import io.cucumber.java.Before; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; +import org.junit.Assert; +import stepsdesign.BeforeActions; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.sql.SQLException; +import java.util.NoSuchElementException; +import java.util.UUID; + +import static io.cdap.e2e.pages.locators.CdfGCSLocators.filePath; + +/** + * Setup BQ for Wrangler tests. + */ +public class TestSetupHooks { + + @Before(order = 1, value = "@BQ_SINK_TEST") + public static void setTempTargetBQTableName() { + String bqTargetTableName = "E2E_TARGET_" + UUID.randomUUID().toString().replaceAll("-", "_"); + PluginPropertyUtils.addPluginProp("bqTargetTable", bqTargetTableName); + BeforeActions.scenario.write("BQ Target table name - " + bqTargetTableName); + } + + @After(order = 1, value = "@BQ_SINK_TEST") + public static void deleteTempTargetBQTable() throws IOException, InterruptedException { + String bqTargetTableName = PluginPropertyUtils.pluginProp("bqTargetTable"); + try { + BigQueryClient.dropBqQuery(bqTargetTableName); + BeforeActions.scenario.write("BQ Target table - " + bqTargetTableName + " deleted successfully"); + PluginPropertyUtils.removePluginProp("bqTargetTable"); + } catch (BigQueryException e) { + if (e.getMessage().contains("Not found: Table")) { + BeforeActions.scenario.write("BQ Target Table " + bqTargetTableName + " does not exist"); + } else { + Assert.fail(e.getMessage()); + } + } + } + + /** + * Create BigQuery table. + */ + @Before(order = 1, value = "@BQ_SOURCE_CSV_TEST") + public static void createTempSourceBQTable() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileCsv"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv")); + } + @Before(order = 1, value = "@BQ_SOURCE_JSON_TEST") + public static void createTempSourceBQTableJson() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileJson"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileJson")); + } + @Before(order = 1, value = "@BQ_SOURCE_FXDLEN_TEST") + public static void createTempSourceBQTableFxdLen() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileFxdLen"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileFxdLen")); + } + @Before(order = 1, value = "@BQ_SOURCE_HL7_TEST") + public static void createTempSourceBQTableHl7() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileHl7"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileHl7")); + } + + @After(order = 1, value = "@BQ_SOURCE_TEST") + public static void deleteTempSourceBQTable() throws IOException, InterruptedException { + String bqSourceTable = PluginPropertyUtils.pluginProp("bqSourceTable"); + BigQueryClient.dropBqQuery(bqSourceTable); + BeforeActions.scenario.write("BQ source Table " + bqSourceTable + " deleted successfully"); + PluginPropertyUtils.removePluginProp("bqSourceTable"); + } + + private static void createSourceBQTableWithQueries(String bqCreateTableQueryFile, String bqInsertDataQueryFile) + throws IOException, InterruptedException { + String bqSourceTable = "E2E_SOURCE_" + UUID.randomUUID().toString().substring(0, 5).replaceAll("-", + "_"); + + String createTableQuery = StringUtils.EMPTY; + try { + createTableQuery = new String(Files.readAllBytes(Paths.get(TestSetupHooks.class.getResource + ("/" + bqCreateTableQueryFile).toURI())) + , StandardCharsets.UTF_8); + createTableQuery = createTableQuery.replace("DATASET", PluginPropertyUtils.pluginProp("dataset")) + .replace("TABLE_NAME", bqSourceTable); + } catch (Exception e) { + BeforeActions.scenario.write("Exception in reading " + bqCreateTableQueryFile + " - " + e.getMessage()); + Assert.fail("Exception in BigQuery testdata prerequisite setup " + + "- error in reading create table query file " + e.getMessage()); + } + + String insertDataQuery = StringUtils.EMPTY; + try { + insertDataQuery = new String(Files.readAllBytes(Paths.get(TestSetupHooks.class.getResource + ("/" + bqInsertDataQueryFile).toURI())) + , StandardCharsets.UTF_8); + insertDataQuery = insertDataQuery.replace("DATASET", PluginPropertyUtils.pluginProp("dataset")) + .replace("TABLE_NAME", bqSourceTable); + } catch (Exception e) { + BeforeActions.scenario.write("Exception in reading " + bqInsertDataQueryFile + " - " + e.getMessage()); + Assert.fail("Exception in BigQuery testdata prerequisite setup " + + "- error in reading insert data query file " + e.getMessage()); + } + BigQueryClient.getSoleQueryResult(createTableQuery); + try { + BigQueryClient.getSoleQueryResult(insertDataQuery); + } catch (NoSuchElementException e) { + // Insert query does not return any record. + // Iterator on TableResult values in getSoleQueryResult method throws NoSuchElementException + } + PluginPropertyUtils.addPluginProp("bqSourceTable", bqSourceTable); + BeforeActions.scenario.write("BQ Source Table " + bqSourceTable + " created successfully"); + } +} diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/package-info.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/package-info.java new file mode 100644 index 000000000..63f8efabc --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Package contains the stepDesign for common features. + */ +package io.cdap.plugin.common.stepsdesign; diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/ValidationHelper.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/ValidationHelper.java new file mode 100644 index 000000000..f35d6d311 --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/ValidationHelper.java @@ -0,0 +1,101 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package io.cdap.plugin.wrangler.actions; + +import com.esotericsoftware.minlog.Log; +import com.google.cloud.bigquery.FieldValueList; +import com.google.cloud.bigquery.TableResult; +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import io.cdap.e2e.utils.BigQueryClient; +import io.cdap.e2e.utils.PluginPropertyUtils; +import io.cucumber.core.logging.Logger; +import io.cucumber.core.logging.LoggerFactory; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; + +/** + * Validation Helper. + */ +public class ValidationHelper { + + private static final Logger LOG = LoggerFactory.getLogger(ValidationHelper.class); + static Gson gson = new Gson(); + public static boolean validateActualDataToExpectedData(String table, String fileName) throws IOException, + InterruptedException, URISyntaxException { + Map bigQueryMap = new HashMap<>(); + Map fileMap = new HashMap<>(); + Path importExpectedFile = Paths.get(ValidationHelper.class.getResource("/" + fileName).toURI()); + + getBigQueryTableData(table, bigQueryMap); + getFileData(importExpectedFile.toString(), fileMap); + + boolean isMatched = bigQueryMap.equals(fileMap); + + return isMatched; + } + + public static void getFileData(String fileName, Map fileMap) { + try (BufferedReader br = new BufferedReader(new FileReader(fileName))) { + String line; + while ((line = br.readLine()) != null) { + JsonObject json = gson.fromJson(line, JsonObject.class); + if (json.has("id")) { // Check if the JSON object has the "id" key + JsonElement idElement = json.get("id"); + if (idElement.isJsonPrimitive()) { + String idKey = idElement.getAsString(); + fileMap.put(idKey, json); + } else { + Log.error("ID key not found"); + } + } + } + } catch (IOException e) { + System.err.println("Error reading the file: " + e.getMessage()); + } + } + + private static void getBigQueryTableData(String targetTable, Map bigQueryMap) + throws IOException, InterruptedException { + String dataset = PluginPropertyUtils.pluginProp("dataset"); + String projectId = PluginPropertyUtils.pluginProp("projectId"); + String selectQuery = "SELECT TO_JSON(t) FROM `" + projectId + "." + dataset + "." + targetTable + "` AS t"; + TableResult result = BigQueryClient.getQueryResult(selectQuery); + + for (FieldValueList row : result.iterateAll()) { + JsonObject json = gson.fromJson(row.get(0).getStringValue(), JsonObject.class); + if (json.has("id")) { // Check if the JSON object has the "id" key + JsonElement idElement = json.get("id"); + if (idElement.isJsonPrimitive()) { + String idKey = idElement.getAsString(); + bigQueryMap.put(idKey, json); + } else { + LOG.error("Data Mismatched"); + } + } else { + LOG.error("ID Key not found in JSON object"); + } + } + } +} diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/package-info.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/package-info.java new file mode 100644 index 000000000..4d0f2be85 --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Package contains the actions for Wrangler features. + */ +package io.cdap.plugin.wrangler.actions; diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunner.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunner.java new file mode 100644 index 000000000..87b0d1aec --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunner.java @@ -0,0 +1,36 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package io.cdap.plugin.wrangler.runners; + +import io.cucumber.junit.Cucumber; +import io.cucumber.junit.CucumberOptions; +import org.junit.runner.RunWith; + +/** + * Test Runner to execute Wrangler plugin test cases. + */ +@RunWith(Cucumber.class) +@CucumberOptions( + features = {"src/e2e-test/features"}, + glue = {"stepsdesign", "io.cdap.plugin.common.stepsdesign", "io.cdap.plugin.wrangler.stepsdesign", + "io.cdap.plugin.wrangler.locators"}, + tags = {"@Wrangler"}, + plugin = {"pretty", "html:target/cucumber-html-report/wrangler-required", + "json:target/cucumber-reports/cucumber-wrangler-required.json", + "junit:target/cucumber-reports/cucumber-wrangler-required.xml"} +) + public class TestRunner { +} diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunnerRequired.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunnerRequired.java new file mode 100644 index 000000000..868e067ff --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunnerRequired.java @@ -0,0 +1,35 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package io.cdap.plugin.wrangler.runners; + +import io.cucumber.junit.Cucumber; +import io.cucumber.junit.CucumberOptions; +import org.junit.runner.RunWith; + +/** + * Test Runner to execute Wrangler plugin test cases. + */ +@RunWith(Cucumber.class) +@CucumberOptions( + features = {"src/e2e-test/features"}, + glue = {"stepsdesign", "io.cdap.plugin.common.stepsdesign", "io.cdap.plugin.wrangler.stepsdesign"}, + tags = {"@Wrangler_Required"}, + plugin = {"pretty", "html:target/cucumber-html-report/wrangler-required", + "json:target/cucumber-reports/cucumber-wrangler-required.json", + "junit:target/cucumber-reports/cucumber-wrangler-required.xml"} +) +public class TestRunnerRequired { +} diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/package-info.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/package-info.java new file mode 100644 index 000000000..b90a7504c --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Package contains the runners for Wrangler features. + */ +package io.cdap.plugin.wrangler.runners; diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/Wrangler.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/Wrangler.java new file mode 100644 index 000000000..9d51ea34c --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/Wrangler.java @@ -0,0 +1,41 @@ +package io.cdap.plugin.wrangler.stepsdesign; +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import io.cdap.e2e.utils.CdfHelper; +import io.cdap.e2e.utils.PluginPropertyUtils; +import io.cdap.plugin.wrangler.actions.ValidationHelper; +import io.cucumber.java.en.Then; +import org.junit.Assert; + +import java.io.IOException; +import java.net.URISyntaxException; + +/** + * Step Design to execute Wrangler plugin test cases. + */ + +public class Wrangler implements CdfHelper { + + @Then("Validate The Data From BQ To BQ With Actual And Expected File for: {string}") + public void validateTheDataFromBQToBQWithActualAndExpectedFileFor(String expectedFile) throws IOException, + InterruptedException, URISyntaxException { + boolean recordsMatched = ValidationHelper.validateActualDataToExpectedData( + PluginPropertyUtils.pluginProp("bqTargetTable"), + PluginPropertyUtils.pluginProp(expectedFile)); + Assert.assertTrue("Value of records in actual and expected file is equal", recordsMatched); + } +} diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/package-info.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/package-info.java new file mode 100644 index 000000000..3e212c76c --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Package contains the stepDesign for Wrangler features. + */ +package io.cdap.plugin.wrangler.stepsdesign; diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_csv b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_csv new file mode 100644 index 000000000..048583abb --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_csv @@ -0,0 +1,6 @@ +{"abc":"L#89,adam,joy,3","body_2":"adam","body_3":"joy","body_4":3,"body_q":"40","id":"89"} +{"abc":"D#34,aman,,2","body_2":"aman","body_3":"shubh","body_4":2,"body_q":"20","id":"34"} +{"abc":"C#12,ronnie,root,1","body_2":"ronnie","body_3":"root","body_4":1,"body_q":"20","id":"12"} +{"abc":"C#12,ronnie,root,1","body_2":"ronnie","body_3":"root","body_4":1,"body_q":"20","id":"C"} +{"abc":"D#34,aman,,2","body_2":"aman","body_3":"shubh","body_4":2,"body_q":"20","id":"D"} +{"abc":"L#89,adam,joy,3","body_2":"adam","body_3":"joy","body_4":3,"body_q":"40","id":"L"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength new file mode 100644 index 000000000..33010a877 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength @@ -0,0 +1,2 @@ +{"Url":"http://example.com:80/docs/books/tutorial/index.html?name=networking#DOWNLOADING","fixedlength":"21 10 ABCXYZ","fixedlength_1":"21","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GIYSAIBRGAQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"21 10 ABCXYZ","id":" 10","url_authority":"example.com:80","url_filename":"/docs/books/tutorial/index.html?name=networking","url_host":"example.com","url_path":"/docs/books/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"} +{"Url":"http://geeks.com:80/docs/chair/tutorial/index.html?name=networking#DOWNLOADING","fixedlength":"19 13 ABCXYZ","fixedlength_1":"19","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GE4SAIBRGMQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"19 13 ABCXYZ","id":" 13","url_authority":"geeks.com:80","url_filename":"/docs/chair/tutorial/index.html?name=networking","url_host":"geeks.com","url_path":"/docs/chair/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_hl7 b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_hl7 new file mode 100644 index 000000000..3d1429529 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_hl7 @@ -0,0 +1,2 @@ +{"Body":"s��\u0011y�X��\u0006�H���","Body_hl7_MSH_9_1":"ALM","address":"test","id":"3"} +{"Body":"F<��\u001c����#J��^�:","Body_hl7_MSH_9_1":"BLM","address":"address2","id":"4"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_json b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_json new file mode 100644 index 000000000..b8cac65cb --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_json @@ -0,0 +1,3 @@ +{"Body":"hello abc","copied":{"first":"Root","last":"joy"},"desc":"nick, hello abc","id":22,"json_age":"{\"json_id\":22,\"copied\":{\"first\":\"Root\",\"last\":\"joy\"},\"json_age\":1,\"json_name\":{\"first\":\"Root\",\"last\":\"joy\"},\"json_pet\":\"testing\",\"json_id_json_name\":\"22,{\\\"first\\\":\\\"Root\\\",\\\"last\\\":\\\"joy\\\"}\",\"body\":\"hello abc\",\"desc\":\"nick, hello abc\"}","json_id_json_name":"22,{\"first\":\"Root\",\"last\":\"joy\"}","json_name":{"first":"Root","last":"joy"},"json_pet":"testing"} +{"Body":"hello def","copied":{"first":"dded","last":"share"},"desc":"hello, hello def","id":23,"json_age":"{\"json_id\":23,\"copied\":{\"first\":\"dded\",\"last\":\"share\"},\"json_age\":2,\"json_name\":{\"first\":\"dded\",\"last\":\"share\"},\"json_pet\":\"testing\",\"json_id_json_name\":\"23,{\\\"first\\\":\\\"dded\\\",\\\"last\\\":\\\"share\\\"}\",\"body\":\"hello def\",\"desc\":\"hello, hello def\"}","json_id_json_name":"23,{\"first\":\"dded\",\"last\":\"share\"}","json_name":{"first":"dded","last":"share"},"json_pet":"testing"} +{"Body":"hello ghi","copied":{"first":"Root","last":"Joltie"},"desc":"doms, hello ghi","id":24,"json_age":"{\"json_id\":24,\"copied\":{\"first\":\"Root\",\"last\":\"Joltie\"},\"json_age\":3,\"json_name\":{\"first\":\"Root\",\"last\":\"Joltie\"},\"json_pet\":\"testing\",\"json_id_json_name\":\"24,{\\\"first\\\":\\\"Root\\\",\\\"last\\\":\\\"Joltie\\\"}\",\"body\":\"hello ghi\",\"desc\":\"doms, hello ghi\"}","json_id_json_name":"24,{\"first\":\"Root\",\"last\":\"Joltie\"}","json_name":{"first":"Root","last":"Joltie"},"json_pet":"testing"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryCsv.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryCsv.txt new file mode 100644 index 000000000..dbb47af31 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryCsv.txt @@ -0,0 +1 @@ +create table `DATASET.TABLE_NAME` (body STRING) diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt new file mode 100644 index 000000000..d4a47cbb8 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt @@ -0,0 +1 @@ +create table `DATASET.TABLE_NAME` (url STRING, fixedlength STRING) \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryhl7.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryhl7.txt new file mode 100644 index 000000000..6af1bd254 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryhl7.txt @@ -0,0 +1 @@ +create table `DATASET.TABLE_NAME` (address STRING, Body STRING) diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryCsv.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryCsv.txt new file mode 100644 index 000000000..a4f07c60a --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryCsv.txt @@ -0,0 +1,5 @@ +INSERT INTO DATASET.TABLE_NAME (body) +VALUES +('C#12,ronnie,root,1'), +('D#34,aman,,2'), +('L#89,adam,joy,3'); diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryFxdlen.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryFxdlen.txt new file mode 100644 index 000000000..9af6a9f30 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryFxdlen.txt @@ -0,0 +1,5 @@ +INSERT INTO DATASET.TABLE_NAME (url,fixedlength) +VALUES +('http://example.com:80/docs/books/tutorial/index.html?name=networking#DOWNLOADING','21 10 ABCXYZ'), +('http://geeks.com:80/docs/chair/tutorial/index.html?name=networking#DOWNLOADING','19 13 ABCXYZ'), +('http://amazing.com:80/docs/tables/tutorial/index.html?name=networking#DOWNLOADING','18 14 CDEFGH'); diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryHl7.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryHl7.txt new file mode 100644 index 000000000..9afa292e3 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryHl7.txt @@ -0,0 +1,5 @@ +INSERT INTO DATASET.TABLE_NAME (address,Body) +VALUES +('address1','MSH|^~?2||.|||199908180016||ADT^A04|ADT.1.1698593|P|3'), +('address2','MSH|^~?2||.|||199908180016||BSC^A04|ADT.1.1698593|P|4'), +('','MSH|^~?2||.|||199908180016||JKL^A04|ADT.1.1698593|P|5'); diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt new file mode 100644 index 000000000..dc9fa7d17 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt @@ -0,0 +1,6 @@ +INSERT INTO DATASET.TABLE_NAME (body,json) +VALUES +(' hello abc', '{"id": 1, "name": {"first": "Root", "last": "joy"}, "age": 22, "pet": "nick", "height": 5.8}'), +('hello def', '{"id": 2, "name": {"first": "dded", "last": "share"}, "age": 23, "pet": "hello", "height": 6.8}'), +('hello ghi', '{"id": 3, "name": {"first": "Root", "last": "Joltie"}, "age": 24, "pet": "doms", "height": 7.8}'); + diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt new file mode 100644 index 000000000..be6b585ea --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt @@ -0,0 +1 @@ +create table `DATASET.TABLE_NAME` (body STRING, json STRING) \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties new file mode 100644 index 000000000..3ec62490a --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties @@ -0,0 +1,28 @@ +#json file path +Directive_parse_csv=testData/Wrangler\ + /parse_csv_wrangle-cdap-data-pipeline.json +Directive_parse_json=testData/Wrangler/parse_json_Wrangle-cdap-data-pipeline (1).json +Directive_parse_Fixed_Length=testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json +Directive_parse_hl7=testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json +bqSourceTable=dummy +sourcePath=example/hello.csv +#bq queries file path +CreateBQTableQueryFileCsv=BQtesdata/BigQuery/BigQueryCreateTableQueryCsv.txt +InsertBQDataQueryFileCsv=BQtesdata/BigQuery/BigQueryInsertDataQueryCsv.txt +CreateBQTableQueryFileJson=BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt +InsertBQDataQueryFileJson=BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt +CreateBQDataQueryFileFxdLen=BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt +InsertBQDataQueryFileFxdLen=BQtesdata/BigQuery/BigQueryInsertDataQueryFxdlen.txt +CreateBQDataQueryFileHl7=BQtesdata/BigQuery/BigQueryCreateTableQueryhl7.txt +InsertBQDataQueryFileHl7=BQtesdata/BigQuery/BigQueryInsertDataQueryHl7.txt + +#bq properties +projectId=cdf-athena +dataset=test_automation +dataset2=Wrangler +#expectedBQFiles +ExpectedDirective_parse_csv=BQValidationExpectedFiles/Directive_parse_csv +ExpectedDirective_parse_json=BQValidationExpectedFiles/Directive_parse_json +ExpectedDirective_parse_FixedLength=BQValidationExpectedFiles/Directive_parse_fixedlength +ExpectedDirective_parse_hl7=BQValidationExpectedFiles/Directive_parse_hl7 + diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json new file mode 100644 index 000000000..f0c391602 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json @@ -0,0 +1,180 @@ +{ + "name": "parse_HL7_Wrangler", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "Wrangler", + "table": "hl7finalt", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":\"string\"}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "enableQueryingViews": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":\"string\"}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:495", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "343px" + } + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "directives": "parse-as-hl7 :Body\nhash :Body MD5\nset-type :Body string \nkeep address,Body,Body_hl7_MSH_12,Body_hl7_MSH_9_1\nfind-and-replace address s/address1/test/g\nmask-shuffle :Body_hl7_MSH_9_1\nsend-to-error empty(address)\nrename Body_hl7_MSH_12 id ", + "field": "*", + "precondition": "false", + "workspaceId": "8317e17e-30ca-491a-8a07-56e124d53603", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body_hl7_MSH_9_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]}]}", + "on-error": "fail-pipeline" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body_hl7_MSH_9_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":\"string\"}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:496", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "343px" + } + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "hlupde", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body_hl7_MSH_9_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body_hl7_MSH_9_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body_hl7_MSH_9_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:497", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "343px" + } + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "5cd279f1-4ca0-11ee-b9e7-0000007a8317" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_csv_wrangle-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_csv_wrangle-cdap-data-pipeline.json new file mode 100644 index 000000000..bcd2d8458 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_csv_wrangle-cdap-data-pipeline.json @@ -0,0 +1,179 @@ +{ + "name": "parse_csv_wrangle", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "wrangler_ankit", + "table": "table5", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "enableQueryingViews": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:441", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "343.5px" + } + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "directives": "parse-as-csv :body ',' false\nrename body_1 new_id\nquantize body_4 body_q 1:2=20,3:4=40\nset-type :body_4 integer \ncolumns-replace s/^new_//g\nfill-null-or-empty :body_3 'shubh'\nset-headers :abc\nchange-column-case uppercase\ncleanse-column-names\nsplit-to-rows :id '#'", + "field": "*", + "precondition": "false", + "workspaceId": "b0564dfc-77db-4d95-b3bc-b04bcdf0f687", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"abc\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_2\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_4\",\"type\":[\"int\",\"null\"]},{\"name\":\"body_q\",\"type\":[\"string\",\"null\"]}]}", + "on-error": "fail-pipeline" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"abc\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_2\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_4\",\"type\":[\"int\",\"null\"]},{\"name\":\"body_q\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:442", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "343.5px" + } + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "uptable", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"abc\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_2\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_4\",\"type\":[\"int\",\"null\"]},{\"name\":\"body_q\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"abc\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_2\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_4\",\"type\":[\"int\",\"null\"]},{\"name\":\"body_q\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"abc\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_2\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_4\",\"type\":[\"int\",\"null\"]},{\"name\":\"body_q\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:443", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "343.5px" + } + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "afe60ddd-4b3a-11ee-9107-0000007a4d1b" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json new file mode 100644 index 000000000..25135f405 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json @@ -0,0 +1,179 @@ +{ + "name": "parse_fixedlength_wrangle", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "Wrangler", + "table": "fstab", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":\"string\"},{\"name\":\"fixedlength\",\"type\":\"string\"}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "enableQueryingViews": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":\"string\"},{\"name\":\"fixedlength\",\"type\":\"string\"}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:503", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "343px" + } + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "directives": "parse-as-fixed-length :fixedlength 2,4,5,3\nsplit-url url\nwrite-as-csv :url_protocol\nurl-encode :url\nurl-decode :url\nencode base32 fixedlength\ndecode base32 fixedlength_encode_base32\nsplit-to-columns :url_query '='\nrename fixedlength_2 id\nfilter-rows-on condition-true fixedlength_4 !~ 'XYZ'", + "field": "*", + "precondition": "false", + "workspaceId": "f4d30074-2193-4690-a589-2982afc0a21a", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}", + "on-error": "fail-pipeline" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":\"string\"},{\"name\":\"fixedlength\",\"type\":\"string\"}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:504", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "343px" + } + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "fstabup", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:505", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "343px" + } + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "88ba63d3-4c08-11ee-81a4-0000001ad828" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_json_Wrangle-cdap-data-pipeline (1).json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_json_Wrangle-cdap-data-pipeline (1).json new file mode 100644 index 000000000..ccc37ccef --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_json_Wrangle-cdap-data-pipeline (1).json @@ -0,0 +1,180 @@ +{ + "name": "parse_json_Wrangle", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "Wrangler", + "table": "jstab", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Body\",\"type\":\"string\"},{\"name\":\"json\",\"type\":\"string\"}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "enableQueryingViews": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Body\",\"type\":\"string\"},{\"name\":\"json\",\"type\":\"string\"}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:518", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "327px" + } + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "directives": "parse-as-json :json 1\nltrim :Body\nset-column :desc concat(json_pet, \", \", body)\ncopy :json_name :copied\nswap :json_id :json_age\nmerge :json_id :json_name :json_id_json_name ,\nmask-number :json_pet 'testing'\ndrop json_height\nwrite-as-json-map :json_age\nrename json_id id", + "field": "*", + "precondition": "false", + "workspaceId": "d496b8e4-ca6f-4d38-9877-d76bb52c218e", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[{\"type\":\"record\",\"name\":\"copied05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}", + "on-error": "fail-pipeline" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[{\"type\":\"record\",\"name\":\"copied05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Body\",\"type\":\"string\"},{\"name\":\"json\",\"type\":\"string\"}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:519", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "327px" + } + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "jstabsupd", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[{\"type\":\"record\",\"name\":\"copied05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[{\"type\":\"record\",\"name\":\"copied05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[{\"type\":\"record\",\"name\":\"copied05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:520", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "327px" + } + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "516e13c0-4c05-11ee-b70a-0000001ecd5c" +} \ No newline at end of file