diff --git a/parquet-hadoop/README.md b/parquet-hadoop/README.md index 6f2373b9ec..d235c618ce 100644 --- a/parquet-hadoop/README.md +++ b/parquet-hadoop/README.md @@ -408,7 +408,10 @@ ParquetInputFormat to materialize records. It should be a the descendant class o ## Class: PropertiesDrivenCryptoFactory **Property:** `parquet.encryption.column.keys` -**Description:** List of columns to encrypt, with master key IDs (see HIVE-21848).Format: `:,;:...`. Note: nested column names must be specified as full dot-separated paths for each leaf column. +**Description:** List of columns to encrypt, with master key IDs (see HIVE-21848). +Format: `:,;:...`. +Unlisted columns are not encrypted. +Note: nested column names must be specified as full dot-separated paths for each leaf column. **Default value:** None. --- @@ -419,6 +422,12 @@ ParquetInputFormat to materialize records. It should be a the descendant class o --- +**Property:** `parquet.encryption.complete.columns` +**Description:** Complete column encryption - if set to `true`, unlisted columns are encrypted (using the footer master key). +**Default value:** `false` + +--- + **Property:** `parquet.encryption.uniform.key` **Description:** Master key ID for uniform encryption of all columns and footer. If set, `column.keys` and `footer.key` parameters should not be used. **Default value:** None. diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java index 817ab4d961..274b8eae67 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java @@ -49,13 +49,19 @@ public class PropertiesDrivenCryptoFactory implements EncryptionPropertiesFactor /** * List of columns to encrypt, with master key IDs (see HIVE-21848). - * Format: "masterKeyID:colName,colName;masterKeyID:colName..." + * Format: "masterKeyID:colName,colName;masterKeyID:colName...". + * Unlisted columns are not encrypted. */ public static final String COLUMN_KEYS_PROPERTY_NAME = "parquet.encryption.column.keys"; /** * Master key ID for footer encryption/signing. */ public static final String FOOTER_KEY_PROPERTY_NAME = "parquet.encryption.footer.key"; + /** + * Encrypt unlisted columns using footer key. + * By default, false - unlisted columns are not encrypted. + */ + public static final String COMPLETE_COLUMN_ENCRYPTION_PROPERTY_NAME = "parquet.encryption.complete.columns"; /** * Master key ID for uniform encryption (same key for all columns and footer). */ @@ -72,6 +78,7 @@ public class PropertiesDrivenCryptoFactory implements EncryptionPropertiesFactor public static final String ENCRYPTION_ALGORITHM_DEFAULT = ParquetCipher.AES_GCM_V1.toString(); public static final boolean PLAINTEXT_FOOTER_DEFAULT = false; + public static final boolean COMPLETE_COLUMN_ENCRYPTION_DEFAULT = false; private static final SecureRandom RANDOM = new SecureRandom(); @@ -82,6 +89,9 @@ public FileEncryptionProperties getFileEncryptionProperties(Configuration fileHa String footerKeyId = fileHadoopConfig.getTrimmed(FOOTER_KEY_PROPERTY_NAME); String columnKeysStr = fileHadoopConfig.getTrimmed(COLUMN_KEYS_PROPERTY_NAME); String uniformKeyId = fileHadoopConfig.getTrimmed(UNIFORM_KEY_PROPERTY_NAME); + boolean completeColumnEncryption = fileHadoopConfig.getBoolean(COMPLETE_COLUMN_ENCRYPTION_PROPERTY_NAME, + COMPLETE_COLUMN_ENCRYPTION_DEFAULT); + boolean emptyFooterKeyId = stringIsEmpty(footerKeyId); boolean emptyColumnKeyIds = stringIsEmpty(columnKeysStr); @@ -111,6 +121,9 @@ public FileEncryptionProperties getFileEncryptionProperties(Configuration fileHa throw new ParquetCryptoRuntimeException("Uniform encryption. Cant have column keys configured in " + COLUMN_KEYS_PROPERTY_NAME); } + if (completeColumnEncryption) { + throw new ParquetCryptoRuntimeException("Complete column encryption cant be applied in uniform encryption mode"); + } // Now assign footer key id to uniform key id footerKeyId = uniformKeyId; @@ -164,6 +177,10 @@ public FileEncryptionProperties getFileEncryptionProperties(Configuration fileHa Map encryptedColumns = getColumnEncryptionProperties(dekLength, columnKeysStr, keyWrapper); propertiesBuilder = propertiesBuilder.withEncryptedColumns(encryptedColumns); + + if (completeColumnEncryption) { + propertiesBuilder = propertiesBuilder.withCompleteColumnEncryption(); + } } if (plaintextFooter) { diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java b/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java index 0c5a7cc5d8..2384b48c49 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java @@ -99,6 +99,9 @@ * - plaintext footer mode. * - ENCRYPT_COLUMNS_AND_FOOTER_CTR: Encrypt two columns and the footer, with different * keys. Use the alternative (AES_GCM_CTR_V1) algorithm. + * - COMPLETE_COLUMN_ENCRYPTION: Encrypt two columns and the footer, with different + * keys. Encrypt other columns with the footer key. + * - UNIFORM_ENCRYPTION: Encrypt all columns and footer with the same master key. * - NO_ENCRYPTION: Do not encrypt anything * * @@ -270,6 +273,18 @@ public Configuration getHadoopConfiguration(TestPropertiesDrivenEncryption test) return conf; } }, + COMPLETE_COLUMN_ENCRYPTION { + /** + * Encrypt two columns and the footer, with different master keys. + * Encrypt other columns with the footer master key. + */ + public Configuration getHadoopConfiguration(TestPropertiesDrivenEncryption test) { + Configuration conf = getCryptoProperties(test); + setColumnAndFooterKeys(conf); + conf.setBoolean(PropertiesDrivenCryptoFactory.COMPLETE_COLUMN_ENCRYPTION_PROPERTY_NAME, true); + return conf; + } + }, NO_ENCRYPTION { /** * Do not encrypt anything