diff --git a/cdap-runtime-ext-dataproc/src/main/java/io/cdap/cdap/runtime/spi/common/DataprocUtils.java b/cdap-runtime-ext-dataproc/src/main/java/io/cdap/cdap/runtime/spi/common/DataprocUtils.java index 584d7b4f69c7..ce13b245391e 100644 --- a/cdap-runtime-ext-dataproc/src/main/java/io/cdap/cdap/runtime/spi/common/DataprocUtils.java +++ b/cdap-runtime-ext-dataproc/src/main/java/io/cdap/cdap/runtime/spi/common/DataprocUtils.java @@ -25,6 +25,7 @@ import com.google.cloud.storage.BlobInfo; import com.google.cloud.storage.Storage; import com.google.cloud.storage.StorageBatch; +import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; import com.google.common.io.CharStreams; @@ -40,6 +41,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.SplittableRandom; @@ -175,13 +177,69 @@ public static Map parseKeyValueConfig(@Nullable String configVal } for (String property : configValue.split(delimiter)) { String[] parts = property.split(keyValueDelimiter, 2); - String key = parts[0].trim(); - String value = parts.length > 1 ? parts[1].trim() : ""; + if (parts.length != 2) { + throw new IllegalArgumentException("Invalid KeyValue " + property); + } + String key = parts[0]; + String value = parts[1]; map.put(key, value); } return map; } + /** + * Parses labels that are expected to be of the form key1=val1,key2=val2 into a map of key + * values. + * + *

If a label key or value is invalid, a message will be logged but the key-value will not be + * returned in the map. Keys and values cannot be longer than 63 characters. Keys and values can + * only contain lowercase letters, numeric characters, underscores, and dashes. Keys must start + * with a lowercase letter and must not be empty. + * + *

If a label is given without a '=', the label value will be empty. If a label is given as + * 'key=', the label value will be empty. If a label has multiple '=', it will be ignored. For + * example, 'key=val1=val2' will be ignored. + * + * @param labelsStr the labels string to parse + * @return valid labels from the parsed string + */ + public static Map parseLabels(String labelsStr, + String labelDelimiter, String keyValueDelimiter) { + Splitter labelSplitter = Splitter.on(labelDelimiter).trimResults().omitEmptyStrings(); + Splitter kvSplitter = Splitter.on(keyValueDelimiter).trimResults().omitEmptyStrings(); + + Map validLabels = new HashMap<>(); + for (String keyvalue : labelSplitter.split(labelsStr)) { + Iterator iter = kvSplitter.split(keyvalue).iterator(); + if (!iter.hasNext()) { + continue; + } + String key = iter.next(); + String val = iter.hasNext() ? iter.next() : ""; + if (iter.hasNext()) { + LOG.warn("Ignoring invalid label {}. Labels should be of the form 'key{}val' or just 'key'", + keyvalue, keyValueDelimiter); + continue; + } + if (!LABEL_KEY_PATTERN.matcher(key).matches()) { + LOG.warn( + "Ignoring invalid label key {}. Label keys cannot be longer than 63 characters, must start with " + + "a lowercase letter, and can only contain lowercase letters, numeric characters, underscores," + + " and dashes.", key); + continue; + } + if (!LABEL_VAL_PATTERN.matcher(val).matches()) { + LOG.warn( + "Ignoring invalid label value {}. Label values cannot be longer than 63 characters, " + + "and can only contain lowercase letters, numeric characters, underscores, and dashes.", + val); + continue; + } + validLabels.put(key, val); + } + return validLabels; + } + /** * Parses the given list of IP CIDR blocks into list of {@link IPRange}. */ diff --git a/cdap-runtime-ext-dataproc/src/main/java/io/cdap/cdap/runtime/spi/provisioner/dataproc/AbstractDataprocProvisioner.java b/cdap-runtime-ext-dataproc/src/main/java/io/cdap/cdap/runtime/spi/provisioner/dataproc/AbstractDataprocProvisioner.java index e972009462d4..326a22093dc0 100644 --- a/cdap-runtime-ext-dataproc/src/main/java/io/cdap/cdap/runtime/spi/provisioner/dataproc/AbstractDataprocProvisioner.java +++ b/cdap-runtime-ext-dataproc/src/main/java/io/cdap/cdap/runtime/spi/provisioner/dataproc/AbstractDataprocProvisioner.java @@ -284,7 +284,12 @@ protected final Map getCommonDataprocLabels(ProvisionerContext p // specified in cdap-site. This is to ensure consistent delimiters for // provisioner properties. String provisionerLabelsStr = provisionerContext.getProperties().get(LABELS_PROPERTY); - labels.putAll(DataprocUtils.parseKeyValueConfig(provisionerLabelsStr, ";", "\\|")); + // the UI never sends nulls, it only sends empty strings. We need to ignore + // both null and empty strings. + if (!Strings.isNullOrEmpty(provisionerLabelsStr)) { + labels.putAll( + DataprocUtils.parseLabels(provisionerLabelsStr, ";", "|")); + } // Add system labels. ProvisionerSystemContext systemContext = getSystemContext(); @@ -295,7 +300,9 @@ protected final Map getCommonDataprocLabels(ProvisionerContext p // labels are expected to be in format: // name1=val1,name2=val2 - labels.putAll(DataprocUtils.parseKeyValueConfig(extraLabelsStr, ",", "=")); + if (extraLabelsStr != null) { + labels.putAll(DataprocUtils.parseLabels(extraLabelsStr, ",", "=")); + } return Collections.unmodifiableMap(labels); } diff --git a/cdap-runtime-ext-dataproc/src/test/java/io/cdap/cdap/runtime/spi/common/DataprocUtilsTest.java b/cdap-runtime-ext-dataproc/src/test/java/io/cdap/cdap/runtime/spi/common/DataprocUtilsTest.java index a7dfe27d7379..15d34efcdbcc 100644 --- a/cdap-runtime-ext-dataproc/src/test/java/io/cdap/cdap/runtime/spi/common/DataprocUtilsTest.java +++ b/cdap-runtime-ext-dataproc/src/test/java/io/cdap/cdap/runtime/spi/common/DataprocUtilsTest.java @@ -30,7 +30,7 @@ public class DataprocUtilsTest { public void testParseSingleLabel() { Map expected = new HashMap<>(); expected.put("key", "val"); - Assert.assertEquals(expected, DataprocUtils.parseKeyValueConfig("key=val", ",", "=")); + Assert.assertEquals(expected, DataprocUtils.parseLabels("key=val", ",", "=")); } @Test @@ -38,7 +38,8 @@ public void testParseMultipleLabels() { Map expected = new HashMap<>(); expected.put("k1", "v1"); expected.put("k2", "v2"); - Assert.assertEquals(expected, DataprocUtils.parseKeyValueConfig("k1=v1,k2=v2", ",", "=")); + Assert.assertEquals(expected, + DataprocUtils.parseLabels("k1=v1,k2=v2", ",", "=")); } @Test @@ -46,7 +47,8 @@ public void testParseLabelsIgnoresWhitespace() { Map expected = new HashMap<>(); expected.put("k1", "v1"); expected.put("k2", "v2"); - Assert.assertEquals(expected, DataprocUtils.parseKeyValueConfig(" k1 =\tv1 ,\nk2 = v2 ", ",", "=")); + Assert.assertEquals(expected, + DataprocUtils.parseLabels(" k1 =\tv1 ,\nk2 = v2 ", ",", "=")); } @Test @@ -54,6 +56,32 @@ public void testParseLabelsWithoutVal() { Map expected = new HashMap<>(); expected.put("k1", ""); expected.put("k2", ""); - Assert.assertEquals(expected, DataprocUtils.parseKeyValueConfig("k1,k2=", ",", "=")); + Assert.assertEquals(expected, DataprocUtils.parseLabels("k1,k2=", ",", "=")); + } + + @Test + public void testParseLabelsIgnoresInvalidKey() { + Map expected = new HashMap<>(); + Assert.assertEquals(expected, DataprocUtils.parseLabels(("A"), ",", "=")); + Assert.assertEquals(expected, DataprocUtils.parseLabels(("0"), ",", "=")); + Assert.assertEquals(expected, DataprocUtils.parseLabels(("a.b"), ",", "=")); + Assert.assertEquals(expected, DataprocUtils.parseLabels(("a^b"), ",", "=")); + StringBuilder longStr = new StringBuilder(); + for (int i = 0; i < 64; i++) { + longStr.append('a'); + } + Assert.assertEquals(expected, DataprocUtils.parseLabels(longStr.toString(), ",", "=")); + } + + @Test + public void testParseLabelsIgnoresInvalidVal() { + Map expected = new HashMap<>(); + Assert.assertEquals(expected, DataprocUtils.parseLabels("a=A", ",", "=")); + Assert.assertEquals(expected, DataprocUtils.parseLabels("a=ab.c", ",", "=")); + StringBuilder longStr = new StringBuilder(); + for (int i = 0; i < 64; i++) { + longStr.append('a'); + } + Assert.assertEquals(expected, DataprocUtils.parseLabels(String.format("a=%s", longStr.toString()), ",", "=")); } } diff --git a/cdap-runtime-ext-dataproc/src/test/java/io/cdap/cdap/runtime/spi/provisioner/dataproc/DataprocProvisionerTest.java b/cdap-runtime-ext-dataproc/src/test/java/io/cdap/cdap/runtime/spi/provisioner/dataproc/DataprocProvisionerTest.java index 92c37dc85558..e60fb35393a7 100644 --- a/cdap-runtime-ext-dataproc/src/test/java/io/cdap/cdap/runtime/spi/provisioner/dataproc/DataprocProvisionerTest.java +++ b/cdap-runtime-ext-dataproc/src/test/java/io/cdap/cdap/runtime/spi/provisioner/dataproc/DataprocProvisionerTest.java @@ -486,12 +486,19 @@ public void testCommonDataprocLabels() { context.addProperty("labels", "user|cdap;program|data-pipeline"); Map labels = provisioner.getCommonDataprocLabels(context); // Check that the value from system context has overwritten the provisioner context. - Assert.assertEquals(labels.get("user"), "system"); + Assert.assertEquals("system", labels.get("user")); // Check that the CDAP version is set by system context. - Assert.assertEquals(labels.get("cdap-version"), "6_4"); + Assert.assertEquals("6_4", labels.get("cdap-version")); // Check that the non-overwritten value from user context is present. - Assert.assertEquals(labels.get("program"), "data-pipeline"); + Assert.assertEquals("data-pipeline", labels.get("program")); // Check that the unique value from system context is present. - Assert.assertEquals(labels.get("system-prop"), "value"); + Assert.assertEquals("value", labels.get("system-prop")); + } + + @Test + public void testCommonDataprocLabelsInvalidLabel() { + context.addProperty("labels", "email|user@cdapio"); + Map labels = provisioner.getCommonDataprocLabels(context); + Assert.assertNull(labels.get("email")); } }