Skip to content

Commit

Permalink
[refactor](statistic) fetch statistic data with catalog and database …
Browse files Browse the repository at this point in the history
…id (apache#33928)

In previously, all statistic tables have a column named `id`.
This column is combined with `tableId` and `indexId`.
Because table's id is unique globally, so we can query statistic table by only `id`
to fetch the table's statistic data.

By for external table, the table's id may not be unique globally. For example, the table id
may only be unique within a database. So we need more predicates to fetch the right statistic data.

In this PR, I changed all query statements of statistic table, by adding `catalog_id` and `database_id` conditions.
For example:

```
SELECT * FROM 
column_statistics
WHERE `id` = '${id}' AND `catalog_id` = '${catalogId}' AND `db_id` = '${dbId}'
```

The content of `id` column in statistic tables remain unchanged, so that after user upgrade Doris,
the existing statistic data can still be used.
  • Loading branch information
morningman committed Apr 27, 2024
1 parent c998e2f commit 0888611
Show file tree
Hide file tree
Showing 18 changed files with 152 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ public void setName(String newName) {
name = newName;
}

void setQualifiedDbName(String qualifiedDbName) {
public void setQualifiedDbName(String qualifiedDbName) {
this.qualifiedDbName = qualifiedDbName;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ public enum ErrorCode {
ERR_VIEW_NO_EXPLAIN(1345, new byte[]{'H', 'Y', '0', '0', '0'}, "EXPLAIN/SHOW can not be issued; lacking "
+ "privileges for underlying table"),
ERR_FRM_UNKNOWN_TYPE(1346, new byte[]{'H', 'Y', '0', '0', '0'}, "File '%s' has unknown type '%s' in its header"),
ERR_WRONG_OBJECT(1347, new byte[]{'H', 'Y', '0', '0', '0'}, "'%s.%s' is not %s"),
ERR_WRONG_OBJECT(1347, new byte[]{'H', 'Y', '0', '0', '0'}, "'%s.%s' is not %s. %s."),
ERR_NONUPDATEABLE_COLUMN(1348, new byte[]{'H', 'Y', '0', '0', '0'}, "Column '%s' is not updatable"),
ERR_VIEW_SELECT_DERIVED(1349, new byte[]{'H', 'Y', '0', '0', '0'}, "View's SELECT contains a subquery in the FROM"
+ " clause"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -865,18 +865,22 @@ public void dropTable(DropTableStmt stmt) throws DdlException {
// Check if a view
if (stmt.isView()) {
if (!(table instanceof View)) {
ErrorReport.reportDdlException(ErrorCode.ERR_WRONG_OBJECT, dbName, tableName, "VIEW");
ErrorReport.reportDdlException(ErrorCode.ERR_WRONG_OBJECT, dbName, tableName, "VIEW",
genDropHint(table));
}
} else {
if (table instanceof View) {
ErrorReport.reportDdlException(ErrorCode.ERR_WRONG_OBJECT, dbName, tableName, "TABLE");
ErrorReport.reportDdlException(ErrorCode.ERR_WRONG_OBJECT, dbName, tableName, "TABLE",
genDropHint(table));
}
}

if (!stmt.isMaterializedView() && table instanceof MTMV) {
ErrorReport.reportDdlException(ErrorCode.ERR_WRONG_OBJECT, dbName, tableName, "TABLE");
ErrorReport.reportDdlException(ErrorCode.ERR_WRONG_OBJECT, dbName, tableName, "TABLE",
genDropHint(table));
} else if (stmt.isMaterializedView() && !(table instanceof MTMV)) {
ErrorReport.reportDdlException(ErrorCode.ERR_WRONG_OBJECT, dbName, tableName, "MTMV");
ErrorReport.reportDdlException(ErrorCode.ERR_WRONG_OBJECT, dbName, tableName, "MTMV",
genDropHint(table));
}

if (!stmt.isForceDrop()) {
Expand Down Expand Up @@ -939,6 +943,18 @@ public void dropTable(DropTableStmt stmt) throws DdlException {
tableName, dbName, stmt.isForceDrop(), costTimes);
}

private static String genDropHint(TableIf table) {
String type = "";
if (table instanceof View) {
type = "VIEW";
} else if (table instanceof OlapTable) {
type = "TABLE";
} else if (table instanceof MTMV) {
type = "MATERIALIZED VIEW";
}
return "Use 'DROP " + type + " " + table.getName();
}

public boolean unprotectDropTable(Database db, Table table, boolean isForceDrop, boolean isReplay,
long recycleTime) {
if (table.getType() == TableType.ELASTICSEARCH) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,8 @@ private static ColumnStatistic getColumnStatistic(TableIf table, String colName)
}

private static Histogram getColumnHistogram(TableIf table, String colName) {
return Env.getCurrentEnv().getStatisticsCache().getHistogram(table.getId(), colName);
return Env.getCurrentEnv().getStatisticsCache().getHistogram(
table.getDatabase().getCatalog().getId(), table.getDatabase().getId(), table.getId(), colName);
}

/**
Expand Down
11 changes: 7 additions & 4 deletions fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
Original file line number Diff line number Diff line change
Expand Up @@ -1046,7 +1046,7 @@ private void handleShowCreateTable() throws AnalysisException {
} else {
if (showStmt.isView()) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_OBJECT, showStmt.getDb(),
showStmt.getTable(), "VIEW");
showStmt.getTable(), "VIEW", "Use 'SHOW CREATE TABLE '" + table.getName());
}
rows.add(Lists.newArrayList(table.getName(), createTableStmt.get(0)));
resultSet = table.getType() != TableType.MATERIALIZED_VIEW
Expand Down Expand Up @@ -2509,8 +2509,9 @@ private void handleShowColumnStats() throws AnalysisException {
}

private void getStatsForAllColumns(List<Pair<Pair<String, String>, ColumnStatistic>> columnStatistics,
TableIf tableIf) throws AnalysisException {
List<ResultRow> resultRows = StatisticsRepository.queryColumnStatisticsForTable(tableIf.getId());
TableIf tableIf) {
List<ResultRow> resultRows = StatisticsRepository.queryColumnStatisticsForTable(
tableIf.getDatabase().getCatalog().getId(), tableIf.getDatabase().getId(), tableIf.getId());
// row[4] is index id, row[5] is column name.
for (ResultRow row : resultRows) {
String indexName = tableIf.getName();
Expand Down Expand Up @@ -2555,7 +2556,9 @@ private void getStatsForSpecifiedColumns(List<Pair<Pair<String, String>, ColumnS
columnStatistics.add(Pair.of(Pair.of(indexName, colName), columnStatistic));
} else if (partitionNames == null) {
ColumnStatistic columnStatistic =
StatisticsRepository.queryColumnStatisticsByName(tableIf.getId(), indexId, colName);
StatisticsRepository.queryColumnStatisticsByName(
tableIf.getDatabase().getCatalog().getId(),
tableIf.getDatabase().getId(), tableIf.getId(), indexId, colName);
columnStatistics.add(Pair.of(Pair.of(indexName, colName), columnStatistic));
} else {
String finalIndexName = indexName;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3296,9 +3296,10 @@ public TStatus updateStatsCache(TUpdateFollowerStatsCacheRequest request) throws
ColStatsData data = GsonUtils.GSON.fromJson(request.colStatsData, ColStatsData.class);
ColumnStatistic c = data.toColumnStatistic();
if (c == ColumnStatistic.UNKNOWN) {
Env.getCurrentEnv().getStatisticsCache().invalidate(k.tableId, k.idxId, k.colName);
Env.getCurrentEnv().getStatisticsCache().invalidate(k.catalogId, k.dbId, k.tableId, k.idxId, k.colName);
} else {
Env.getCurrentEnv().getStatisticsCache().updateColStatsCache(k.tableId, k.idxId, k.colName, c);
Env.getCurrentEnv().getStatisticsCache().updateColStatsCache(
k.catalogId, k.dbId, k.tableId, k.idxId, k.colName, c);
}
// Return Ok anyway
return new TStatus(TStatusCode.OK);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ public void dropStats(DropStatsStmt dropStatsStmt) throws DdlException {
invalidateLocalStats(catalogId, dbId, tblId, cols, tableStats);
// Drop stats ddl is master only operation.
invalidateRemoteStats(catalogId, dbId, tblId, cols, dropStatsStmt.isAllColumns());
StatisticsRepository.dropStatistics(tblId, cols);
StatisticsRepository.dropStatisticsByColNames(catalogId, dbId, tblId, cols);
}

public void dropStats(TableIf table) throws DdlException {
Expand All @@ -633,7 +633,7 @@ public void dropStats(TableIf table) throws DdlException {
invalidateLocalStats(catalogId, dbId, tableId, cols, tableStats);
// Drop stats ddl is master only operation.
invalidateRemoteStats(catalogId, dbId, tableId, cols, true);
StatisticsRepository.dropStatistics(table.getId(), cols);
StatisticsRepository.dropStatisticsByColNames(catalogId, dbId, table.getId(), cols);
}

public void invalidateLocalStats(long catalogId, long dbId, long tableId,
Expand Down Expand Up @@ -666,7 +666,7 @@ public void invalidateLocalStats(long catalogId, long dbId, long tableId,
}
}
tableStats.removeColumn(indexName, column);
statisticsCache.invalidate(tableId, indexId, column);
statisticsCache.invalidate(catalogId, dbId, tableId, indexId, column);
}
}
tableStats.updatedTime = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) {
private Optional<ColumnStatistic> loadFromStatsTable(StatisticsCacheKey key) {
List<ResultRow> columnResults = null;
try {
columnResults = StatisticsRepository.loadColStats(key.tableId, key.idxId, key.colName);
columnResults = StatisticsRepository.loadColStats(
key.catalogId, key.dbId, key.tableId, key.idxId, key.colName);
} catch (InternalQueryExecutionException e) {
LOG.info("Failed to load stats for table {} column {}. Reason:{}",
key.tableId, key.colName, e.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ public void doExecute() throws Exception {

StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
StatisticsUtil.execUpdate(stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE));
Env.getCurrentEnv().getStatisticsCache().refreshHistogramSync(tbl.getId(), -1, col.getName());
Env.getCurrentEnv().getStatisticsCache().refreshHistogramSync(
tbl.getDatabase().getCatalog().getId(), tbl.getDatabase().getId(), tbl.getId(), -1, col.getName());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,16 @@ public ColumnStatistic getColumnStatistics(long catalogId, long dbId, long tblId
return ColumnStatistic.UNKNOWN;
}

public Histogram getHistogram(long tblId, String colName) {
return getHistogram(tblId, -1, colName).orElse(null);
public Histogram getHistogram(long ctlId, long dbId, long tblId, String colName) {
return getHistogram(ctlId, dbId, tblId, -1, colName).orElse(null);
}

public Optional<Histogram> getHistogram(long tblId, long idxId, String colName) {
private Optional<Histogram> getHistogram(long ctlId, long dbId, long tblId, long idxId, String colName) {
ConnectContext ctx = ConnectContext.get();
if (ctx != null && ctx.getSessionVariable().internalSession) {
return Optional.empty();
}
StatisticsCacheKey k = new StatisticsCacheKey(tblId, idxId, colName);
StatisticsCacheKey k = new StatisticsCacheKey(ctlId, dbId, tblId, idxId, colName);
try {
CompletableFuture<Optional<Histogram>> f = histogramCache.get(k);
if (f.isDone()) {
Expand All @@ -112,24 +112,22 @@ public Optional<Histogram> getHistogram(long tblId, long idxId, String colName)
return Optional.empty();
}

public void invalidate(long tblId, long idxId, String colName) {
columnStatisticsCache.synchronous().invalidate(new StatisticsCacheKey(tblId, idxId, colName));
public void invalidate(long ctlId, long dbId, long tblId, long idxId, String colName) {
columnStatisticsCache.synchronous().invalidate(new StatisticsCacheKey(ctlId, dbId, tblId, idxId, colName));
}

public void updateColStatsCache(long tblId, long idxId, String colName, ColumnStatistic statistic) {
columnStatisticsCache.synchronous().put(new StatisticsCacheKey(tblId, idxId, colName), Optional.of(statistic));
public void updateColStatsCache(long ctlId, long dbId, long tblId, long idxId, String colName,
ColumnStatistic statistic) {
columnStatisticsCache.synchronous()
.put(new StatisticsCacheKey(ctlId, dbId, tblId, idxId, colName), Optional.of(statistic));
}

public void refreshColStatsSync(long tblId, long idxId, String colName) {
columnStatisticsCache.synchronous().refresh(new StatisticsCacheKey(-1, -1, tblId, idxId, colName));
public void refreshColStatsSync(long ctlId, long dbId, long tblId, long idxId, String colName) {
columnStatisticsCache.synchronous().refresh(new StatisticsCacheKey(ctlId, dbId, tblId, idxId, colName));
}

public void refreshColStatsSync(long catalogId, long dbId, long tblId, long idxId, String colName) {
columnStatisticsCache.synchronous().refresh(new StatisticsCacheKey(catalogId, dbId, tblId, idxId, colName));
}

public void refreshHistogramSync(long tblId, long idxId, String colName) {
histogramCache.synchronous().refresh(new StatisticsCacheKey(tblId, idxId, colName));
public void refreshHistogramSync(long ctlId, long dbId, long tblId, long idxId, String colName) {
histogramCache.synchronous().refresh(new StatisticsCacheKey(ctlId, dbId, tblId, idxId, colName));
}

public void preHeat() {
Expand Down Expand Up @@ -168,11 +166,9 @@ private void doPreHeat() {
for (ResultRow r : recentStatsUpdatedCols) {
try {
StatsId statsId = new StatsId(r);
long tblId = statsId.tblId;
long idxId = statsId.idxId;
String colId = statsId.colId;
final StatisticsCacheKey k =
new StatisticsCacheKey(tblId, idxId, colId);
new StatisticsCacheKey(statsId.catalogId, statsId.dbId, statsId.tblId, statsId.idxId,
statsId.colId);
ColumnStatistic c = ColumnStatistic.fromResultRow(r);
if (c.count > 0 && c.ndv == 0 && c.count != c.numNulls) {
c = ColumnStatistic.UNKNOWN;
Expand All @@ -189,10 +185,11 @@ private void doPreHeat() {
*/
public void syncColStats(ColStatsData data) {
StatsId statsId = data.statsId;
final StatisticsCacheKey k = new StatisticsCacheKey(statsId.tblId, statsId.idxId, statsId.colId);
final StatisticsCacheKey k = new StatisticsCacheKey(statsId.catalogId, statsId.dbId, statsId.tblId,
statsId.idxId, statsId.colId);
ColumnStatistic columnStatistic = data.toColumnStatistic();
if (columnStatistic == ColumnStatistic.UNKNOWN) {
invalidate(k.tableId, k.idxId, k.colName);
invalidate(k.catalogId, k.dbId, k.tableId, k.idxId, k.colName);
} else {
putCache(k, columnStatistic);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@ public class StatisticsCacheKey {

private static final String DELIMITER = "-";

public StatisticsCacheKey(long tableId, long idxId, String colName) {
this(-1, -1, tableId, idxId, colName);
}

public StatisticsCacheKey(long catalogId, long dbId, long tableId, long idxId, String colName) {
this.catalogId = catalogId;
this.dbId = dbId;
Expand All @@ -55,7 +51,7 @@ public StatisticsCacheKey(long catalogId, long dbId, long tableId, long idxId, S

@Override
public int hashCode() {
return Objects.hash(tableId, idxId, colName);
return Objects.hash(catalogId, dbId, tableId, idxId, colName);
}

@Override
Expand All @@ -67,13 +63,16 @@ public boolean equals(Object obj) {
return false;
}
StatisticsCacheKey k = (StatisticsCacheKey) obj;
return this.tableId == k.tableId && this.idxId == k.idxId && this.colName.equals(k.colName);
return this.catalogId == k.catalogId && this.dbId == k.dbId && this.tableId == k.tableId
&& this.idxId == k.idxId && this.colName.equals(k.colName);
}

@Override
public String toString() {
StringJoiner sj = new StringJoiner(DELIMITER);
sj.add("ColumnStats");
sj.add(String.valueOf(catalogId));
sj.add(String.valueOf(dbId));
sj.add(String.valueOf(tableId));
sj.add(String.valueOf(idxId));
sj.add(colName);
Expand Down
Loading

0 comments on commit 0888611

Please sign in to comment.