Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CDAP-15855: Updated validation API. #7

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<cdap.version>6.0.0-SNAPSHOT</cdap.version>
<cdap.version>6.1.0-SNAPSHOT</cdap.version>
<commons-lang.version>2.6</commons-lang.version>
<guava.version>19.0</guava.version>
<redshift-jdbc.version>1.2.1.1001</redshift-jdbc.version>
Expand All @@ -76,7 +76,7 @@
<widgets.dir>widgets</widgets.dir>
<docs.dir>docs</docs.dir>
<app.parents>
system:cdap-data-pipeline[6.0.0-SNAPSHOT,7.0.0-SNAPSHOT),system:cdap-data-streams[6.0.0-SNAPSHOT,7.0.0-SNAPSHOT)
system:cdap-data-pipeline[6.1.0-SNAPSHOT,7.0.0-SNAPSHOT),system:cdap-data-streams[6.1.0-SNAPSHOT,7.0.0-SNAPSHOT)
</app.parents>
<!-- this is here because project.basedir evaluates to null in the script build step -->
<main.basedir>${project.basedir}</main.basedir>
Expand Down
76 changes: 52 additions & 24 deletions src/main/java/io/cdap/plugin/ToUTF8Action.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.api.common.Bytes;
import io.cdap.cdap.api.plugin.PluginConfig;
import io.cdap.cdap.etl.api.FailureCollector;
import io.cdap.cdap.etl.api.PipelineConfigurer;
import io.cdap.cdap.etl.api.StageConfigurer;
import io.cdap.cdap.etl.api.action.Action;
import io.cdap.cdap.etl.api.action.ActionContext;
import io.cdap.cdap.etl.api.validation.ValidationException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
Expand Down Expand Up @@ -63,14 +66,15 @@ public ToUTF8Action(ToUTF8Config config) {
}

@Override
public void configurePipeline(PipelineConfigurer pipelineConfigurer) throws IllegalArgumentException {
public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
super.configurePipeline(pipelineConfigurer);
config.validate();
StageConfigurer stageConfigurer = pipelineConfigurer.getStageConfigurer();
config.validate(stageConfigurer.getFailureCollector());
}

@Override
public void run(ActionContext context) throws Exception {
config.validate();
config.validate(context.getFailureCollector());
Path source = new Path(config.sourceFilePath);
Path dest = new Path(config.destFilePath);

Expand All @@ -83,7 +87,7 @@ public void run(ActionContext context) throws Exception {
} else {
// Convert all the files in a directory
PathFilter filter = new PathFilter() {
private final Pattern pattern = Pattern.compile(config.fileRegex);
private final Pattern pattern = Pattern.compile(config.getFileRegex());

@Override
public boolean accept(Path path) {
Expand All @@ -98,7 +102,7 @@ public boolean accept(Path path) {

if (listFiles.length == 0) {
LOG.warn("Not converting any files from source {} matching regular expression",
source.toString(), config.fileRegex);
source.toString(), config.getFileRegex());
}

if (fileSystem.exists(dest) && fileSystem.isFile(dest)) {
Expand Down Expand Up @@ -133,7 +137,7 @@ private void convertSingleFile(Path source, Path dest, FileSystem fileSystem) th
out.flush();
out.close();
} catch (IOException e) {
if (!config.continueOnError) {
if (!config.getContinueOnError()) {
throw new IOException(String.format("Failed to convert file %s to %s", source.toString(), dest.toString()));
}
LOG.warn(String.format("Exception convert file %s to %s", source.toString(), dest.toString()), e);
Expand All @@ -144,18 +148,28 @@ private void convertSingleFile(Path source, Path dest, FileSystem fileSystem) th
* Config class that contains all properties required for running the unload command.
*/
public static class ToUTF8Config extends PluginConfig {

public static final String SOURCE_FILE_PATH = "sourceFilePath";
public static final String DEST_FILE_PATH = "destFilePath";
public static final String FILE_REGEX = "fileRegex";
public static final String CHARSET = "charset";

@Name(SOURCE_FILE_PATH)
@Macro
@Description("The source location where the file or files live. You can use glob syntax here such as *.dat.")
private String sourceFilePath;

@Name(DEST_FILE_PATH)
@Macro
@Description("The destination location where the converted files should be.")
private String destFilePath;

@Name(CHARSET)
@Macro
@Description("The source charset.")
private String charset;

@Name(FILE_REGEX)
@Macro
@Nullable
@Description("A regular expression for filtering files such as .*\\.txt")
Expand All @@ -166,6 +180,13 @@ public static class ToUTF8Config extends PluginConfig {
@Description("Set to true if this plugin should ignore errors.")
private Boolean continueOnError;

public String getFileRegex() {
return (Strings.isNullOrEmpty(fileRegex)) ? ".*" : fileRegex;
}

public boolean getContinueOnError() {
return continueOnError == null ? false : continueOnError;
}

public ToUTF8Config(String sourceFilePath, String destFilePath, @Nullable String fileRegex,
String charset, @Nullable Boolean continueOnError) {
Expand All @@ -179,31 +200,38 @@ public ToUTF8Config(String sourceFilePath, String destFilePath, @Nullable String
/**
* Validates the config parameters required for unloading the data.
*/
private void validate() throws IllegalArgumentException {
try {
Charset.forName(charset);
} catch (UnsupportedCharsetException e) {
throw new IllegalArgumentException("The charset entered is not valid. Please use a value " +
"from https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html.", e);
private void validate(FailureCollector collector) throws ValidationException {
if (Strings.isNullOrEmpty(sourceFilePath)) {
collector.addFailure("Source file or folder is required.", "Set source file or folder.")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the error message and the corrective action are basically the same (like here), it's better to just have the message and leave the corrective action null.

.withConfigProperty(SOURCE_FILE_PATH);
collector.getOrThrowException();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need to throw exception here? This would just validate one property and throw an exception. we should validate other properties as well before throwing an exception

}
try {
Pattern.compile(fileRegex);
} catch (Exception e) {
throw new IllegalArgumentException("The regular expression pattern provided is not a valid " +
"regular expression.", e);
}
if (Strings.isNullOrEmpty(sourceFilePath)) {
throw new IllegalArgumentException("Source file or folder is required.");
Path source = new Path(sourceFilePath);
source.getFileSystem(new Configuration());
} catch (IOException e) {
collector.addFailure("Cannot determine the file system of the source file.", null)
.withConfigProperty(SOURCE_FILE_PATH).withStacktrace(e.getStackTrace());
}
if (Strings.isNullOrEmpty(destFilePath)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this check can be moved up to be validated along with source path

throw new IllegalArgumentException("Destination file or folder is required.");
collector.addFailure("Destination file or folder is required.",
"Set destination file or folder.").withConfigProperty(DEST_FILE_PATH);
}
try {
Path source = new Path(sourceFilePath);
FileSystem fileSystem = source.getFileSystem(new Configuration());
} catch (IOException e) {
throw new IllegalArgumentException("Cannot determine the file system of the source file.", e);
Pattern.compile(getFileRegex());
} catch (Exception e) {
collector.addFailure("The regular expression pattern provided is not a valid " +
"regular expression.", "Set correct regular expression pattern.")
.withConfigProperty(FILE_REGEX);
}
try {
Charset.forName(charset);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this can be validated along with source path.

} catch (UnsupportedCharsetException e) {
collector.addFailure(e.getMessage(), "The charset entered is not valid. Please use a value " +
"from https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html.")
.withConfigProperty(CHARSET);
}
collector.getOrThrowException();
}
}
}
91 changes: 87 additions & 4 deletions src/test/java/io/cdap/plugin/ToUTF8ConfigTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@

package io.cdap.plugin;

import io.cdap.cdap.etl.api.validation.CauseAttributes;
import io.cdap.cdap.etl.api.validation.ValidationException;
import io.cdap.cdap.etl.api.validation.ValidationFailure;
import io.cdap.cdap.etl.mock.action.MockActionContext;
import io.cdap.cdap.etl.mock.common.MockPipelineConfigurer;
import org.junit.ClassRule;
import org.junit.Test;
Expand All @@ -34,6 +38,8 @@
public class ToUTF8ConfigTest {
private static final String ISO_8859_FILE_NAME = "20150320_clo_prod_cln.dat";
private static final String UTF_8_FILE_NAME = "20150320_clo_prod_cln.dat.utf8";
private static final String MOCK_NAME = "mockstage";
private static final String STAGE = "stage";

private FileFilter filter = new FileFilter() {
private final Pattern pattern = Pattern.compile("[^\\.].*\\.utf8");
Expand All @@ -57,7 +63,8 @@ public void testSingleFile() throws Exception {
null, "ISO-8859-1", false);
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null);
new ToUTF8Action(config).configurePipeline(configurer);
new ToUTF8Action(config).run(null);
MockActionContext context = new MockActionContext();
new ToUTF8Action(config).run(context);
assertEquals(1, destFolder.listFiles(filter).length);
assertEquals(UTF_8_FILE_NAME, destFolder.listFiles(filter)[0].getName());
}
Expand All @@ -73,7 +80,8 @@ public void testFolder() throws Exception {
null, "ISO-8859-1", false);
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null);
new ToUTF8Action(config).configurePipeline(configurer);
new ToUTF8Action(config).run(null);
MockActionContext context = new MockActionContext();
new ToUTF8Action(config).run(context);
assertEquals(1, destFolder.listFiles(filter).length);
assertEquals(UTF_8_FILE_NAME, destFolder.listFiles(filter)[0].getName());
}
Expand All @@ -90,7 +98,8 @@ public void testFolderWithGlob() throws Exception {
null, "ISO-8859-1", false);
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null);
new ToUTF8Action(config).configurePipeline(configurer);
new ToUTF8Action(config).run(null);
MockActionContext context = new MockActionContext();
new ToUTF8Action(config).run(context);
assertEquals(1, destFolder.listFiles(filter).length);
assertEquals(UTF_8_FILE_NAME, destFolder.listFiles(filter)[0].getName());
}
Expand All @@ -106,8 +115,82 @@ public void testFolderWithRegEx() throws Exception {
".*\\.dat", "ISO-8859-1", false);
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null);
new ToUTF8Action(config).configurePipeline(configurer);
new ToUTF8Action(config).run(null);
MockActionContext context = new MockActionContext();
new ToUTF8Action(config).run(context);
assertEquals(1, destFolder.listFiles(filter).length);
assertEquals(UTF_8_FILE_NAME, destFolder.listFiles(filter)[0].getName());
}

@Test
public void testValidationEmptySourcePath() {
ToUTF8Action.ToUTF8Config config = new ToUTF8Action.ToUTF8Config("", "",
".*\\.dat", "ISO-8859-1", false);
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null);
try {
new ToUTF8Action(config).configurePipeline(configurer);
} catch (ValidationException e) {
assertEquals(1, e.getFailures().size());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we also add unit tests where more than 1 failures are collected?

ValidationFailure validationFailure = e.getFailures().get(0);
assertEquals("Source file or folder is required.", validationFailure.getMessage());
assertEquals(1, validationFailure.getCauses().size());
ValidationFailure.Cause cause = validationFailure.getCauses().get(0);
assertEquals(ToUTF8Action.ToUTF8Config.SOURCE_FILE_PATH, cause.getAttribute(CauseAttributes.STAGE_CONFIG));
assertEquals(MOCK_NAME, cause.getAttribute(STAGE));
}
}

@Test
public void testValidationEmptyDestinationPath() {
ToUTF8Action.ToUTF8Config config = new ToUTF8Action.ToUTF8Config("_12_test", "",
".*\\.dat", "ISO-8859-1", false);
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null);
try {
new ToUTF8Action(config).configurePipeline(configurer);
} catch (ValidationException e) {
assertEquals(1, e.getFailures().size());
ValidationFailure validationFailure = e.getFailures().get(0);
assertEquals("Destination file or folder is required.", validationFailure.getMessage());
assertEquals(1, validationFailure.getCauses().size());
ValidationFailure.Cause cause = validationFailure.getCauses().get(0);
assertEquals(ToUTF8Action.ToUTF8Config.DEST_FILE_PATH, cause.getAttribute(CauseAttributes.STAGE_CONFIG));
assertEquals(MOCK_NAME, cause.getAttribute(STAGE));
}
}

@Test
public void testValidationFileRegularExpression() {
ToUTF8Action.ToUTF8Config config = new ToUTF8Action.ToUTF8Config("source_path", "dest_path",
"*\\.dat", "ISO-8859-1", false);
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null);
try {
new ToUTF8Action(config).configurePipeline(configurer);
} catch (ValidationException e) {
assertEquals(1, e.getFailures().size());
ValidationFailure validationFailure = e.getFailures().get(0);
assertEquals("The regular expression pattern provided is not a valid regular expression.",
validationFailure.getMessage());
assertEquals(1, validationFailure.getCauses().size());
ValidationFailure.Cause cause = validationFailure.getCauses().get(0);
assertEquals(ToUTF8Action.ToUTF8Config.FILE_REGEX, cause.getAttribute(CauseAttributes.STAGE_CONFIG));
assertEquals(MOCK_NAME, cause.getAttribute(STAGE));
}
}

@Test
public void testValidationCharacterSet() {
ToUTF8Action.ToUTF8Config config = new ToUTF8Action.ToUTF8Config("source_path", "dest_path",
".*\\.dat", "ISO-885-1", false);
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null);
try {
new ToUTF8Action(config).configurePipeline(configurer);
} catch (ValidationException e) {
assertEquals(1, e.getFailures().size());
ValidationFailure validationFailure = e.getFailures().get(0);
assertEquals("ISO-885-1", validationFailure.getMessage());
assertEquals(1, validationFailure.getCauses().size());
ValidationFailure.Cause cause = validationFailure.getCauses().get(0);
assertEquals(ToUTF8Action.ToUTF8Config.CHARSET, cause.getAttribute(CauseAttributes.STAGE_CONFIG));
assertEquals(MOCK_NAME, cause.getAttribute(STAGE));
}
}
}