Skip to content

Commit

Permalink
optimize alter hive table column validation
Browse files Browse the repository at this point in the history
  • Loading branch information
mchades committed Oct 19, 2023
1 parent fe2ec48 commit d86cc57
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -452,34 +452,65 @@ private void validatePartitionForCreate(Column[] columns, Transform[] partitions
}
}

private void validatePartitionForAlter(
private void validateColumnChangeForAlter(
TableChange[] changes, org.apache.hadoop.hive.metastore.api.Table hiveTable) {
Set<String> partitionFields =
hiveTable.getPartitionKeys().stream().map(FieldSchema::getName).collect(Collectors.toSet());
Set<String> existingFields =
hiveTable.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toSet());
existingFields.addAll(partitionFields);

Arrays.stream(changes)
.filter(c -> c instanceof TableChange.ColumnChange)
.forEach(
c -> {
String changeColumn = ((TableChange.ColumnChange) c).fieldNames()[0];
String fieldToAdd = String.join(".", ((TableChange.ColumnChange) c).fieldNames());
Preconditions.checkArgument(
c instanceof TableChange.UpdateColumnComment
|| !partitionFields.contains(changeColumn),
"Cannot alter partition column: " + changeColumn);
|| !partitionFields.contains(fieldToAdd),
"Cannot alter partition column: " + fieldToAdd);

if (c instanceof TableChange.UpdateColumnType) {
validateColumnType(fieldToAdd, ((TableChange.UpdateColumnType) c).getNewDataType());
}

if (c instanceof TableChange.UpdateColumnPosition
&& ((TableChange.UpdateColumnPosition) c).getPosition()
instanceof TableChange.After) {
String afterColumn =
((TableChange.After) ((TableChange.UpdateColumnPosition) c).getPosition())
.getColumn();
if (partitionFields.contains(afterColumn)) {
&& afterPartitionColumn(
partitionFields, ((TableChange.UpdateColumnPosition) c).getPosition())) {
throw new IllegalArgumentException(
"Cannot alter column position to after partition column");
}

if (c instanceof TableChange.AddColumn) {
TableChange.AddColumn addColumn = (TableChange.AddColumn) c;

validateColumnType(fieldToAdd, addColumn.getDataType());

if ((addColumn.getPosition() == null && !partitionFields.isEmpty())
|| (afterPartitionColumn(partitionFields, addColumn.getPosition()))) {
throw new IllegalArgumentException("Cannot add column after partition column");
}

if (existingFields.contains(fieldToAdd)) {
throw new IllegalArgumentException(
"Cannot alter column position to after partition column: " + afterColumn);
"Cannot add column with duplicate name: " + fieldToAdd);
} else {
existingFields.add(fieldToAdd);
}
}
});
}

private boolean afterPartitionColumn(
Set<String> partitionFields, TableChange.ColumnPosition columnPosition) {
Preconditions.checkArgument(columnPosition != null, "Column position cannot be null");

if (columnPosition instanceof TableChange.After) {
return partitionFields.contains(((TableChange.After) columnPosition).getColumn());
}
return false;
}

private void validateDistributionAndSort(Distribution distribution, SortOrder[] sortOrder) {
if (distribution != Distribution.NONE) {
boolean allNameReference =
Expand Down Expand Up @@ -597,7 +628,7 @@ public Table alterTable(NameIdentifier tableIdent, TableChange... changes)
org.apache.hadoop.hive.metastore.api.Table alteredHiveTable =
table.toHiveTable(tablePropertiesMetadata);

validatePartitionForAlter(changes, alteredHiveTable);
validateColumnChangeForAlter(changes, alteredHiveTable);

for (TableChange change : changes) {
// Table change
Expand All @@ -619,9 +650,7 @@ public Table alterTable(NameIdentifier tableIdent, TableChange... changes)
List<FieldSchema> cols = sd.getCols();

if (change instanceof TableChange.AddColumn) {
TableChange.AddColumn addColumn = (TableChange.AddColumn) change;
validateColumnType(String.join(".", addColumn.fieldNames()), addColumn.getDataType());
doAddColumn(cols, addColumn);
doAddColumn(cols, (TableChange.AddColumn) change);

} else if (change instanceof TableChange.DeleteColumn) {
doDeleteColumn(cols, (TableChange.DeleteColumn) change);
Expand All @@ -636,10 +665,7 @@ public Table alterTable(NameIdentifier tableIdent, TableChange... changes)
doUpdateColumnPosition(cols, (TableChange.UpdateColumnPosition) change);

} else if (change instanceof TableChange.UpdateColumnType) {
TableChange.UpdateColumnType updateColumnType = (TableChange.UpdateColumnType) change;
validateColumnType(
String.join(".", updateColumnType.fieldNames()), updateColumnType.getNewDataType());
doUpdateColumnType(cols, updateColumnType);
doUpdateColumnType(cols, (TableChange.UpdateColumnType) change);

} else {
throw new IllegalArgumentException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ public void testAlterHiveTable() {
columns,
HIVE_COMMENT,
properties,
new Transform[0],
new Transform[] {identity(col2)},
distribution,
sortOrders);
Assertions.assertTrue(hiveCatalog.asTableCatalog().tableExists(tableIdentifier));
Expand Down Expand Up @@ -465,7 +465,7 @@ public void testAlterHiveTable() {
.asTableCatalog()
.alterTable(
tableIdentifier,
TableChange.addColumn(new String[] {"col_1"}, TypeCreator.REQUIRED.I8)));
TableChange.addColumn(new String[] {"col_3"}, TypeCreator.REQUIRED.I8)));
Assertions.assertTrue(
exception
.getMessage()
Expand All @@ -490,6 +490,33 @@ public void testAlterHiveTable() {
"The NOT NULL constraint for column is only supported since Hive 3.0, "
+ "but the current Gravitino Hive catalog only supports Hive 2.x"));

exception =
Assertions.assertThrows(
IllegalArgumentException.class,
() ->
hiveCatalog
.asTableCatalog()
.alterTable(
tableIdentifier,
TableChange.addColumn(new String[] {"col_1"}, TypeCreator.NULLABLE.I8)));
Assertions.assertTrue(
exception.getMessage().contains("Cannot add column after partition column"));

exception =
Assertions.assertThrows(
IllegalArgumentException.class,
() ->
hiveCatalog
.asTableCatalog()
.alterTable(
tableIdentifier,
TableChange.addColumn(
new String[] {col1.name()},
TypeCreator.NULLABLE.I8,
"comment",
TableChange.ColumnPosition.after(col1.name()))));
Assertions.assertTrue(exception.getMessage().contains("Cannot add column with duplicate name"));

// test alter
hiveCatalog
.asTableCatalog()
Expand All @@ -500,19 +527,23 @@ public void testAlterHiveTable() {
TableChange.removeProperty("key1"),
TableChange.setProperty("key2", "val2_new"),
// columns current format: [col_1:I8:comment, col_2:DATE:comment]
TableChange.addColumn(new String[] {"col_3"}, TypeCreator.NULLABLE.STRING),
// columns current format: [col_1:I8:comment, col_2:DATE:comment, col_3:STRING:null]
TableChange.renameColumn(new String[] {"col_2"}, "col_2_new"),
// columns current format: [col_1:I8:comment, col_2_new:DATE:comment, col_3:STRING:null]
TableChange.addColumn(
new String[] {"col_3"},
TypeCreator.NULLABLE.STRING,
null,
TableChange.ColumnPosition.after(col1.name())),
// columns current format: [col_1:I8:comment, col_3:STRING:null, col_2:DATE:comment]
TableChange.renameColumn(new String[] {"col_3"}, "col_3_new"),
// columns current format: [col_1:I8:comment, col_3_new:STRING:null, col_2:DATE:comment]
TableChange.updateColumnComment(new String[] {"col_1"}, HIVE_COMMENT + "_new"),
// columns current format: [col_1:I8:comment_new, col_2_new:DATE:comment,
// col_3:STRING:null]
// columns current format: [col_1:I8:comment_new, col_3_new:STRING:null,
// col_2:DATE:comment]
TableChange.updateColumnType(new String[] {"col_1"}, TypeCreator.NULLABLE.I32),
// columns current format: [col_1:I32:comment_new, col_2_new:DATE:comment,
// col_3:STRING:null]
// columns current format: [col_1:I32:comment_new, col_3_new:STRING:null,
// col_2:DATE:comment]
TableChange.updateColumnPosition(
new String[] {"col_2_new"}, TableChange.ColumnPosition.first())
// columns current: [col_2_new:DATE:comment, col_1:I32:comment_new, col_3:STRING:null]
new String[] {"col_3_new"}, TableChange.ColumnPosition.first())
// columns current: [col_3_new:STRING:null, col_1:I32:comment_new, col_2:DATE:comment]
);
Table alteredTable =
hiveCatalog
Expand All @@ -532,19 +563,19 @@ public void testAlterHiveTable() {
Column[] expected =
new Column[] {
new HiveColumn.Builder()
.withName("col_2_new")
.withType(TypeCreator.NULLABLE.DATE)
.withComment(HIVE_COMMENT)
.withName("col_3_new")
.withType(TypeCreator.NULLABLE.STRING)
.withComment(null)
.build(),
new HiveColumn.Builder()
.withName("col_1")
.withType(TypeCreator.NULLABLE.I32)
.withComment(HIVE_COMMENT + "_new")
.build(),
new HiveColumn.Builder()
.withName("col_3")
.withType(TypeCreator.NULLABLE.STRING)
.withComment(null)
.withName("col_2")
.withType(TypeCreator.NULLABLE.DATE)
.withComment(HIVE_COMMENT)
.build()
};
Assertions.assertArrayEquals(expected, alteredTable.columns());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,11 @@ public void testAlterHiveTable() throws TException, InterruptedException {
TableChange.updateComment(table_comment + "_new"),
TableChange.removeProperty("key1"),
TableChange.setProperty("key2", "val2_new"),
TableChange.addColumn(new String[] {"col_4"}, TypeCreator.NULLABLE.STRING),
TableChange.addColumn(
new String[] {"col_4"},
TypeCreator.NULLABLE.STRING,
null,
TableChange.ColumnPosition.after(columns[1].name())),
TableChange.renameColumn(new String[] {HIVE_COL_NAME2}, "col_2_new"),
TableChange.updateColumnComment(new String[] {HIVE_COL_NAME1}, "comment_new"),
TableChange.updateColumnType(
Expand Down

0 comments on commit d86cc57

Please sign in to comment.