Skip to content

Commit

Permalink
DAT-18790: added Default values for snapshots and related functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
Mykhailo Savchenko committed Oct 24, 2024
1 parent 0fe6e8a commit d439dc3
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,29 @@
import liquibase.snapshot.DatabaseSnapshot;
import liquibase.snapshot.SnapshotGenerator;
import liquibase.snapshot.jvm.ColumnSnapshotGenerator;
import liquibase.statement.DatabaseFunction;
import liquibase.statement.core.RawParameterizedSqlStatement;
import liquibase.structure.DatabaseObject;
import liquibase.structure.core.Column;
import liquibase.structure.core.DataType;

import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ColumnSnapshotGeneratorDatabricks extends ColumnSnapshotGenerator {

private static final String ALL_DATA_TYPES = " BIGINT | BINARY | BOOLEAN | DATE | DECIMAL| DECIMAL\\(| DOUBLE | FLOAT | INT | INTERVAL| VOID | SMALLINT | STRING | VARCHAR\\(\\d+\\) | TIMESTAMP | TIMESTAMP_NTZ | TINYINT | ARRAY<| MAP<| STRUCT<| VARIANT| OBJECT<";
private static final String GENERATED_BY_DEFAULT_REGEX = "(?i)\\s+GENERATED\\s+BY\\s+DEFAULT\\s+AS\\s+IDENTITY";
private static final String DEFAULT_VALUE_SNIPPET_REGEX = "DEFAULT\\s+(.*?)(?i)(\\s+COMMENT\\s+'| PRIMARY\\s+KEY | FOREIGN\\s+KEY | MASK\\s+\\w+|$|,|,\\s+\\w+(" + ALL_DATA_TYPES + "|\\)$)?)";
private static final String SANITIZE_TABLE_SPECIFICATION_REGEX = "(\\(.*?\\))\\s*(?i)(USING|OPTIONS|PARTITIONED BY|CLUSTER BY|LOCATION|COMMENT|TBLPROPERTIES|WITH|$|;$)";
private static final Pattern DEFAULT_VALUE_PATTERN = Pattern.compile(DEFAULT_VALUE_SNIPPET_REGEX);
private static final Pattern SANITIZE_TABLE_SPECIFICATION_PATTERN = Pattern.compile(SANITIZE_TABLE_SPECIFICATION_REGEX);

@Override
public int getPriority(Class<? extends DatabaseObject> objectType, Database database) {
if (database instanceof DatabricksDatabase)
return PRIORITY_DATABASE;
return PRIORITY_NONE;

}

@Override
Expand Down Expand Up @@ -55,22 +62,63 @@ protected DatabaseObject snapshotObject(DatabaseObject example, DatabaseSnapshot
//This should work after fix on Databricks side
if (example instanceof Column) {
Column column = (Column) super.snapshotObject(example, snapshot);
Database database = snapshot.getDatabase();

String query = String.format("SELECT column_default from %s.COLUMNS where table_name = '%s' AND table_schema='%s' AND column_name ='%s';",
database.getSystemSchema(),
column.getRelation().getName(),
column.getRelation().getSchema().getName(),
column.getName());
List<Map<String, ?>> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class)
.getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query));
for (Map<String, ?> tableProperty : tablePropertiesResponse) {
column.setDefaultValue(tableProperty.get("COLUMN_DEFAULT"));
//These two are used too often, avoiding them? otherwise there would be too much DB calls
if(!column.getRelation().getName().equalsIgnoreCase("databasechangelog")
&& !column.getRelation().getName().equalsIgnoreCase("databasechangeloglock")) {
Database database = snapshot.getDatabase();
String query = String.format("SHOW CREATE TABLE %s.%s.%s;",
column.getRelation().getSchema().getCatalog(),
column.getRelation().getSchema().getName(),
column.getRelation().getName());
String showCreateTableResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class)
.getExecutor("jdbc", database).queryForObject(new RawParameterizedSqlStatement(query), String.class);
String defaultValue = extractDefaultValue(showCreateTableResponse, column.getName());
if(defaultValue != null) {
Pattern functionPattern = Pattern.compile("^(\\w+)\\(.*\\)");
Matcher functionMatcher = functionPattern.matcher(defaultValue);
if(functionMatcher.find()) {
DatabaseFunction function = new DatabaseFunction(defaultValue);
column.setDefaultValue(function);
} else {
column.setDefaultValue(defaultValue);
}
}
}
return column;
} else {
return example;
}
}

}
private String extractDefaultValue(String createTableStatement, String columnName) {
String defaultValue = null;
String sanitizedCreateTableStatement = sanitizeStatement(createTableStatement);
Pattern columnWithPotentialDefaultPattern = Pattern.compile("[\\(|,]\\s*(" + columnName + "\\s*\\b\\w*\\b.*?)\\s*(?i)(" + ALL_DATA_TYPES + "|( CONSTRAINT |\\)$))");
Matcher columnWithPotentialDefaultMatcher = columnWithPotentialDefaultPattern.matcher(sanitizedCreateTableStatement);

String columnWithPotentialDefault = "";
if(columnWithPotentialDefaultMatcher.find()) {
columnWithPotentialDefault = columnWithPotentialDefaultMatcher.group(1);
Matcher stringColumnMatcher = Pattern.compile(columnName + "\\s+(?i)(VARCHAR\\(\\d+\\)|STRING )").matcher(sanitizedCreateTableStatement);
Matcher defaultStringMatcher = Pattern.compile(columnName + ".+?DEFAULT\\s+(\\'|\\\")(.*?)\\1").matcher(sanitizedCreateTableStatement);
Matcher defaultValueMatcher = DEFAULT_VALUE_PATTERN.matcher(columnWithPotentialDefault);
if(defaultValueMatcher.find()) {
defaultValue = defaultValueMatcher.group(1);
if(stringColumnMatcher.find() && defaultStringMatcher.find()) {
defaultValue = defaultStringMatcher.group(2);
}
}
}
return defaultValue;
}

private String sanitizeStatement(String createTableStatement) {
createTableStatement = createTableStatement.replace("\n", "");
String sanitizedCreateTableStatement = createTableStatement.replaceAll(GENERATED_BY_DEFAULT_REGEX, " ");
Matcher tableSpecificationMatcher = SANITIZE_TABLE_SPECIFICATION_PATTERN.matcher(sanitizedCreateTableStatement);
if(tableSpecificationMatcher.find()) {
sanitizedCreateTableStatement = tableSpecificationMatcher.group(1);
}
return sanitizedCreateTableStatement;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@
<changeSet author="fl" id="1">
<createTable tableName="tableWithDefaultValues" >
<column name="longcolumn" type="long" autoIncrement="true" generationType="IDENTITY" />
<column name="eventTime" type="timestamp"/>
<column name="year" type="int" defaultValueComputed="GENERATED ALWAYS AS (YEAR(eventTime))"/>
<column name="eventDate" type="date" defaultValueComputed="GENERATED ALWAYS AS (CAST(eventTime AS DATE))" />
<column name="eventDescription" type="string">
<column name="eventTime" type="timestamp" defaultValueComputed="CURRENT_TIMESTAMP()">
<constraints nullable="false" />
</column>
<column name="eventShortDescription" type="string" defaultValueComputed="GENERATED ALWAYS AS (SUBSTRING(eventDescription, 0, 1))" />
<column name="year" type="int" defaultValueComputed="YEAR(CURRENT_TIMESTAMP())"/>
<column name="eventDate" type="date" defaultValueComputed="CAST(CURRENT_TIMESTAMP() AS DATE)" />
<column name="eventDescription" type="string" defaultValue="default string, regular ? almost();!$#@^%[] String bigint">
<constraints nullable="false" />
</column>
<column name="eventShortDescription" type="string" defaultValue="short desc" />
</createTable>
</changeSet>

Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
CREATE TABLE main.liquibase_harness_test_ds.tableWithDefaultValues (longcolumn LONG GENERATED BY DEFAULT AS IDENTITY (START WITH 1 INCREMENT BY 1), eventTime TIMESTAMP, year INT GENERATED ALWAYS AS (YEAR(eventTime)), eventDate date GENERATED ALWAYS AS (CAST(eventTime AS DATE)), eventDescription STRING NOT NULL, eventShortDescription STRING GENERATED ALWAYS AS (SUBSTRING(eventDescription, 0, 1))) USING delta TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'supported', 'delta.columnMapping.mode' = 'name', 'delta.enableDeletionVectors' = true)
CREATE TABLE main.liquibase_harness_test_ds.tableWithDefaultValues (longcolumn LONG GENERATED BY DEFAULT AS IDENTITY (START WITH 1 INCREMENT BY 1), eventTime TIMESTAMP DEFAULT current_timestamp() NOT NULL, year INT DEFAULT YEAR(CURRENT_TIMESTAMP()), eventDate date DEFAULT CAST(CURRENT_TIMESTAMP() AS DATE), eventDescription STRING DEFAULT 'default string, regular ? almost();!$#@^%[] String bigint' NOT NULL, eventShortDescription STRING DEFAULT 'short desc') USING delta TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'supported', 'delta.columnMapping.mode' = 'name', 'delta.enableDeletionVectors' = true)

0 comments on commit d439dc3

Please sign in to comment.