-
Notifications
You must be signed in to change notification settings - Fork 56
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ae63527
commit 5e7ddd8
Showing
7 changed files
with
290 additions
and
16 deletions.
There are no files selected for viewing
43 changes: 43 additions & 0 deletions
43
wrangler-transform/src/e2e-test/features/Wrangler/Runtime.feature
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# Copyright © 2023 Cask Data, Inc. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not | ||
# use this file except in compliance with the License. You may obtain a copy of | ||
# the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||
# License for the specific language governing permissions and limitations under | ||
# the License. | ||
|
||
@Wrangler | ||
Feature: Wrangler - Run time scenarios | ||
|
||
@BQ_SOURCE_TEST @BQ_SINK_TEST | ||
Scenario: To verify User is able to run a pipeline using wrangler and groupBy directive | ||
Given Open Datafusion Project to configure pipeline | ||
Then Click on the Plus Green Button to import the pipelines | ||
Then Select the file for importing the pipeline for the plugin "Directive_GroupBy" | ||
Then Navigate to the properties page of plugin: "BigQueryTable" | ||
Then Replace input plugin property: "project" with value: "projectId" | ||
Then Replace input plugin property: "dataset" with value: "dataset" | ||
Then Replace input plugin property: "table" with value: "bqSourceTable" | ||
Then Click on the Get Schema button | ||
Then Click on the Validate button | ||
Then Close the Plugin Properties page | ||
Then Navigate to the properties page of plugin: "BigQuery2" | ||
Then Replace input plugin property: "project" with value: "projectId" | ||
Then Replace input plugin property: "table" with value: "bqTargetTable" | ||
Then Replace input plugin property: "dataset" with value: "dataset" | ||
Then Click on the Validate button | ||
Then Close the Plugin Properties page | ||
Then Rename the pipeline | ||
Then Deploy the pipeline | ||
Then Run the Pipeline in Runtime | ||
Then Wait till pipeline is in running state | ||
Then Open and capture logs | ||
Then Verify the pipeline status is "Succeeded" | ||
Then Close the pipeline logs | ||
Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_GroupBy" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5 changes: 5 additions & 0 deletions
5
...ler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_wrangler_GroupBy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{"city":"San Jose","cityFirst":"San Jose","firstname":"DOUGLAS","id":"1","lastname":"Williams","state":"CA","zipcode":923564293} | ||
{"city":"Houston","cityFirst":"Houston","firstname":"DAVID","id":"2","lastname":"Johnson","state":"TX","zipcode":1738378970} | ||
{"city":"Manhattan","cityFirst":"Manhattan","firstname":"HUGH","id":"3","lastname":"Jackman","state":"NY","zipcode":-1863622247} | ||
{"city":"San Diego","cityFirst":"San Diego","firstname":"FRANK","id":"5","lastname":"Underwood","state":"CA","zipcode":-1317090526} | ||
{"city":"New York","cityFirst":"New York","firstname":"SARTHAK","id":"7","lastname":"Dash","state":"NY","zipcode":-1949601773} |
2 changes: 2 additions & 0 deletions
2
wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQuery.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
create table `DATASET.TABLE_NAME` (id STRING, firstname STRING, lastname STRING, streetAddress STRING, | ||
city STRING, state STRING, zipcode BIGINT, phoneNumber BIGINT) |
10 changes: 10 additions & 0 deletions
10
wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQuery.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
INSERT INTO DATASET.TABLE_NAME (id, firstname, lastname, streetAddress, city, state, zipcode, phoneNumber) | ||
VALUES | ||
('5', 'Frank', 'Underwood', '1609 Far St.', 'San Diego', 'CA', 2977876770, 19061512345), | ||
('1', 'Douglas', 'Williams', '1 Vista Montana', 'San Jose', 'CA', 9513498885, 35834612345), | ||
('4', 'Walter', 'White', '3828 Piermont Dr', 'Orlando', 'FL', 7349864532, 7829812345), | ||
('3', 'Hugh', 'Jackman', '5, Cool Way', 'Manhattan', 'NY', 6726312345, 1695412345), | ||
('7', 'Sarthak', 'Dash', '123 Far St.', 'New York', 'NY', 2345365523, 1324812345), | ||
('6', 'Serena', 'Woods', '123 Far St.', 'Las Vegas', 'NV', 4533456734, 78919612345), | ||
('2', 'David', 'Johnson', '3 Baypointe Parkway', 'Houston', 'TX', 1738378970, 1451412345), | ||
('8', 'Rahul', 'Dash', '22 MG Road.', 'Bangalore', 'KA',NULL, 94864612345); |
9 changes: 4 additions & 5 deletions
9
wrangler-transform/src/e2e-test/resources/pluginParameters.properties
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,15 @@ | ||
#json file path | ||
Directive_parse_csv=testData/Wrangler\ | ||
/parse_csv_wrangle-cdap-data-pipeline.json | ||
Directive_GroupBy=testData/Wrangler/BQ2BQwithWrnglerNGrpby-cdap-data-pipeline (1).json | ||
bqSourceTable=dummy | ||
sourcePath=example/hello.csv | ||
gcsSourceBucket=dummy | ||
#bq queries file path | ||
CreateBQTableQueryFileCsv=BQtesdata/BigQuery/BigQueryCreateTableQueryCsv.txt | ||
InsertBQDataQueryFileCsv=BQtesdata/BigQuery/BigQueryInsertDataQueryCsv.txt | ||
CreateBQTableQueryFile=BQtesdata/BigQuery/BigQueryCreateTableQuery.txt | ||
InsertBQDataQueryFile=BQtesdata/BigQuery/BigQueryInsertDataQuery.txt | ||
|
||
#bq properties | ||
projectId=cdf-athena | ||
dataset=test_automation | ||
dataset2=Wrangler | ||
#expectedBQFiles | ||
ExpectedDirective_parse_csv=BQValidationExpectedFiles/Directive_parse_csv | ||
ExpectedDirective_GroupBy=BQValidationExpectedFiles/Directive_wrangler_GroupBy |
223 changes: 223 additions & 0 deletions
223
...c/e2e-test/resources/testData/Wrangler/BQ2BQwithWrnglerNGrpby-cdap-data-pipeline (1).json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,223 @@ | ||
{ | ||
"name": "BQ2BQwithWrnglerNGrpby", | ||
"description": "Data Pipeline Application", | ||
"artifact": { | ||
"name": "cdap-data-pipeline", | ||
"version": "6.10.0-SNAPSHOT", | ||
"scope": "SYSTEM" | ||
}, | ||
"config": { | ||
"resources": { | ||
"memoryMB": 2048, | ||
"virtualCores": 1 | ||
}, | ||
"driverResources": { | ||
"memoryMB": 2048, | ||
"virtualCores": 1 | ||
}, | ||
"connections": [ | ||
{ | ||
"from": "BigQueryTable", | ||
"to": "Wrangler" | ||
}, | ||
{ | ||
"from": "Wrangler", | ||
"to": "Group By" | ||
}, | ||
{ | ||
"from": "Group By", | ||
"to": "BigQuery2" | ||
} | ||
], | ||
"postActions": [], | ||
"properties": {}, | ||
"processTimingEnabled": true, | ||
"stageLoggingEnabled": true, | ||
"stages": [ | ||
{ | ||
"name": "BigQueryTable", | ||
"plugin": { | ||
"name": "BigQueryTable", | ||
"type": "batchsource", | ||
"label": "BigQueryTable", | ||
"artifact": { | ||
"name": "google-cloud", | ||
"version": "0.23.0-SNAPSHOT", | ||
"scope": "SYSTEM" | ||
}, | ||
"properties": { | ||
"useConnection": "false", | ||
"dataset": "wrangler_ankit", | ||
"table": "joinerTest", | ||
"schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"streetAddress\",\"type\":[\"string\",\"null\"]},{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"long\",\"null\"]},{\"name\":\"phoneNumber\",\"type\":[\"long\",\"null\"]}]}", | ||
"enableQueryingViews": "false", | ||
"project": "auto-detect", | ||
"serviceAccountType": "filePath", | ||
"serviceFilePath": "auto-detect" | ||
} | ||
}, | ||
"outputSchema": [ | ||
{ | ||
"name": "etlSchemaBody", | ||
"schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"streetAddress\",\"type\":[\"string\",\"null\"]},{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"long\",\"null\"]},{\"name\":\"phoneNumber\",\"type\":[\"long\",\"null\"]}]}" | ||
} | ||
], | ||
"id": "BigQueryTable", | ||
"type": "batchsource", | ||
"label": "BigQueryTable", | ||
"icon": "fa-plug", | ||
"$$hashKey": "object:1585", | ||
"isPluginAvailable": true, | ||
"_uiPosition": { | ||
"left": "346px", | ||
"top": "343px" | ||
} | ||
}, | ||
{ | ||
"name": "Wrangler", | ||
"plugin": { | ||
"name": "Wrangler", | ||
"type": "transform", | ||
"label": "Wrangler", | ||
"artifact": { | ||
"name": "wrangler-transform", | ||
"version": "4.10.0-SNAPSHOT", | ||
"scope": "SYSTEM" | ||
}, | ||
"properties": { | ||
"directives": "drop phonenumber\nuppercase :firstname\nset-type :zipcode integer \nfind-and-replace :streetAddress s/St./Street/Ig\nset-column :lastname_count string:length(lastname)\nfilter-rows-on regex-match lastname_count .*5.*\nfilter-rows-on condition-true zipcode == null || zipcode =~ \"^\\W*$\"", | ||
"field": "*", | ||
"precondition": "false", | ||
"workspaceId": "fb521d04-7644-4ec4-b545-837980f402cf", | ||
"schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"streetAddress\",\"type\":[\"string\",\"null\"]},{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"lastname_count\",\"type\":[\"int\",\"null\"]}]}", | ||
"on-error": "fail-pipeline" | ||
} | ||
}, | ||
"outputSchema": [ | ||
{ | ||
"name": "etlSchemaBody", | ||
"schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"streetAddress\",\"type\":[\"string\",\"null\"]},{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"lastname_count\",\"type\":[\"int\",\"null\"]}]}" | ||
} | ||
], | ||
"inputSchema": [ | ||
{ | ||
"name": "BigQueryTable", | ||
"schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"streetAddress\",\"type\":[\"string\",\"null\"]},{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"long\",\"null\"]},{\"name\":\"phoneNumber\",\"type\":[\"long\",\"null\"]}]}" | ||
} | ||
], | ||
"id": "Wrangler", | ||
"type": "transform", | ||
"label": "Wrangler", | ||
"icon": "icon-DataPreparation", | ||
"$$hashKey": "object:1586", | ||
"isPluginAvailable": true, | ||
"_uiPosition": { | ||
"left": "646px", | ||
"top": "343px" | ||
} | ||
}, | ||
{ | ||
"name": "Group By", | ||
"plugin": { | ||
"name": "GroupByAggregate", | ||
"type": "batchaggregator", | ||
"label": "Group By", | ||
"artifact": { | ||
"name": "core-plugins", | ||
"version": "2.12.0-SNAPSHOT", | ||
"scope": "SYSTEM" | ||
}, | ||
"properties": { | ||
"groupByFields": "city,firstname,lastname,state,zipcode,id", | ||
"aggregates": "cityFirst:First(city)" | ||
} | ||
}, | ||
"outputSchema": [ | ||
{ | ||
"name": "etlSchemaBody", | ||
"schema": "{\"type\":\"record\",\"name\":\"record.typeagg\",\"fields\":[{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"cityFirst\",\"type\":[\"string\",\"null\"]}]}" | ||
} | ||
], | ||
"inputSchema": [ | ||
{ | ||
"name": "Wrangler", | ||
"schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"streetAddress\",\"type\":[\"string\",\"null\"]},{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"lastname_count\",\"type\":[\"int\",\"null\"]}]}" | ||
} | ||
], | ||
"id": "Group-By", | ||
"type": "batchaggregator", | ||
"label": "Group By", | ||
"icon": "icon-groupbyaggregate", | ||
"$$hashKey": "object:1587", | ||
"isPluginAvailable": true, | ||
"_uiPosition": { | ||
"left": "946px", | ||
"top": "343px" | ||
} | ||
}, | ||
{ | ||
"name": "BigQuery2", | ||
"plugin": { | ||
"name": "BigQueryTable", | ||
"type": "batchsink", | ||
"label": "BigQuery2", | ||
"artifact": { | ||
"name": "google-cloud", | ||
"version": "0.23.0-SNAPSHOT", | ||
"scope": "SYSTEM" | ||
}, | ||
"properties": { | ||
"useConnection": "false", | ||
"project": "auto-detect", | ||
"serviceAccountType": "filePath", | ||
"serviceFilePath": "auto-detect", | ||
"dataset": "wrangler_ankit", | ||
"table": "joinTestOutput", | ||
"operation": "insert", | ||
"truncateTable": "false", | ||
"allowSchemaRelaxation": "false", | ||
"location": "US", | ||
"createPartitionedTable": "false", | ||
"partitioningType": "TIME", | ||
"partitionFilterRequired": "false", | ||
"schema": "{\"type\":\"record\",\"name\":\"record.typeagg\",\"fields\":[{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"cityFirst\",\"type\":[\"string\",\"null\"]}]}" | ||
} | ||
}, | ||
"outputSchema": [ | ||
{ | ||
"name": "etlSchemaBody", | ||
"schema": "{\"type\":\"record\",\"name\":\"record.typeagg\",\"fields\":[{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"cityFirst\",\"type\":[\"string\",\"null\"]}]}" | ||
} | ||
], | ||
"inputSchema": [ | ||
{ | ||
"name": "Group By", | ||
"schema": "{\"type\":\"record\",\"name\":\"record.typeagg\",\"fields\":[{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"cityFirst\",\"type\":[\"string\",\"null\"]}]}" | ||
} | ||
], | ||
"id": "BigQuery2", | ||
"type": "batchsink", | ||
"label": "BigQuery2", | ||
"icon": "fa-plug", | ||
"$$hashKey": "object:1588", | ||
"isPluginAvailable": true, | ||
"_uiPosition": { | ||
"left": "1246px", | ||
"top": "343px" | ||
} | ||
} | ||
], | ||
"schedule": "0 1 */1 * *", | ||
"engine": "spark", | ||
"numOfRecordsPreview": 100, | ||
"rangeRecordsPreview": { | ||
"min": 1, | ||
"max": "5000" | ||
}, | ||
"description": "Data Pipeline Application", | ||
"maxConcurrentRuns": 1, | ||
"pushdownEnabled": false, | ||
"transformationPushdown": {} | ||
}, | ||
"version": "714034ca-5154-11ee-9b22-000000505066" | ||
} |