From 8bf56bf54c4181a8e6a63867ebfee7cae452778a Mon Sep 17 00:00:00 2001 From: Jibing Li Date: Wed, 9 Aug 2023 12:04:06 +0800 Subject: [PATCH] Fix external stats collection bugs. Support show cached table stats Support alter column stats. --- docs/en/docs/lakehouse/external-statistics.md | 7 ++++++- docs/zh-CN/docs/lakehouse/external-statistics.md | 7 ++++++- fe/fe-core/src/main/cup/sql_parser.cup | 4 ++-- .../doris/analysis/AlterColumnStatsStmt.java | 11 +++-------- .../org/apache/doris/analysis/AnalyzeTblStmt.java | 7 +++++-- .../apache/doris/analysis/ShowTableStatsStmt.java | 8 +++++++- .../java/org/apache/doris/qe/ShowExecutor.java | 15 ++++++++++++++- .../apache/doris/statistics/AnalysisManager.java | 7 ++++--- .../apache/doris/statistics/HMSAnalysisTask.java | 2 ++ .../apache/doris/statistics/StatisticsCache.java | 8 ++++++++ .../statistics/TableStatisticsCacheLoader.java | 4 +++- 11 files changed, 60 insertions(+), 20 deletions(-) diff --git a/docs/en/docs/lakehouse/external-statistics.md b/docs/en/docs/lakehouse/external-statistics.md index 6c469961a8bd42f..33724fc2388354b 100644 --- a/docs/en/docs/lakehouse/external-statistics.md +++ b/docs/en/docs/lakehouse/external-statistics.md @@ -191,6 +191,11 @@ DROP ANALYZE JOB [JOB_ID] Show statistics includes show table statistics (number of rows) and column statistics. Please refer to View statistics in [Internal Table Statistics](../query-acceleration/statistics.md) #### Table statistics +``` +SHOW TABLE [cached] stats TABLE_NAME; +``` + +View row count of the given table. If the cached parameter is specified, the row count of the specified table that has been loaded into the cache is displayed. ``` mysql> SHOW TABLE STATS hive.tpch100.orders; @@ -203,7 +208,7 @@ mysql> SHOW TABLE STATS hive.tpch100.orders; #### Column statistics ``` -SHOW COLUMN [cached] stats hive.tpch100.orders; +SHOW COLUMN [cached] stats TABLE_NAME; ``` View the column statistics of a table. If the cached parameter is specified, the column information of the specified table that has been loaded into the cache is displayed. diff --git a/docs/zh-CN/docs/lakehouse/external-statistics.md b/docs/zh-CN/docs/lakehouse/external-statistics.md index 0b47ed53295145a..f4f331b2870bd28 100644 --- a/docs/zh-CN/docs/lakehouse/external-statistics.md +++ b/docs/zh-CN/docs/lakehouse/external-statistics.md @@ -191,6 +191,11 @@ DROP ANALYZE JOB [JOB_ID] 信息的查看包括表的统计信息(表的行数)查看和列统计信息查看,请参考[内表统计信息](../query-acceleration/statistics.md)查看统计信息部分。 #### 表统计信息 +``` +SHOW TALBE [cached] stats TABLE_NAME; +``` + +查看statistics表中指定table的行数,如果指定cached参数,则展示的是指定表已加载到缓存中的行数信息。 ``` mysql> SHOW TABLE STATS hive.tpch100.orders; @@ -203,7 +208,7 @@ mysql> SHOW TABLE STATS hive.tpch100.orders; #### 列统计信息 ``` -SHOW COLUMN [cached] stats hive.tpch100.orders; +SHOW COLUMN [cached] stats TABLE_NAME; ``` 查看statistics表中指定table的列统计信息,如果指定cached参数,则展示的是指定表已加载到缓存中的列信息。 diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 7b41cc31900dd8b..9376a8f3d5120bc 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -4176,9 +4176,9 @@ show_param ::= RESULT = new ShowSyncJobStmt(dbName); :} /* show table stats */ - | KW_TABLE KW_STATS table_name:tbl opt_partition_names:partitionNames + | KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames {: - RESULT = new ShowTableStatsStmt(tbl, partitionNames); + RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached); :} /* show column stats */ | KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java index 0e7892dcd109f1e..58b81212671fa3b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java @@ -22,7 +22,6 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PartitionType; -import org.apache.doris.catalog.Table; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; @@ -148,17 +147,13 @@ private void checkPartitionAndColumn() throws AnalysisException { DatabaseIf db = catalog.getDbOrAnalysisException(tableName.getDb()); TableIf table = db.getTableOrAnalysisException(tableName.getTbl()); - if (table.getType() != Table.TableType.OLAP) { - throw new AnalysisException("Only OLAP table statistics are supported"); - } - - OlapTable olapTable = (OlapTable) table; - if (olapTable.getColumn(columnName) == null) { + if (table.getColumn(columnName) == null) { ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME, columnName, FeNameFormat.getColumnNameRegex()); } - if (optPartitionNames != null) { + if (optPartitionNames != null && table instanceof OlapTable) { + OlapTable olapTable = (OlapTable) table; if (olapTable.getPartitionInfo().getType().equals(PartitionType.UNPARTITIONED)) { throw new AnalysisException("Not a partitioned table: " + olapTable.getName()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java index 527f802748dcd2d..fb4c3bb39a5dacf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java @@ -142,8 +142,11 @@ public void check() throws AnalysisException { } checkAnalyzePriv(tableName.getDb(), tableName.getTbl()); if (columnNames == null) { - columnNames = table.getBaseSchema(false) - .stream().map(Column::getName).collect(Collectors.toList()); + // Filter unsupported type columns. + columnNames = table.getBaseSchema(false).stream() + .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) + .map(Column::getName) + .collect(Collectors.toList()); } table.readLock(); try { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index e462c8585ca9847..da10d5c492b1fee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -53,12 +53,14 @@ public class ShowTableStatsStmt extends ShowStmt { private final TableName tableName; private final PartitionNames partitionNames; + private final boolean cached; private TableIf table; - public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames) { + public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached) { this.tableName = tableName; this.partitionNames = partitionNames; + this.cached = cached; } public TableName getTableName() { @@ -133,4 +135,8 @@ public ShowResultSet constructResultSet(TableStatistic tableStatistic) { result.add(row); return new ShowResultSet(getMetaData(), result); } + + public boolean isCached() { + return cached; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 7b0d6c4d8a76389..4bfc6c61b1993b8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -138,6 +138,7 @@ import org.apache.doris.catalog.TabletInvertedIndex; import org.apache.doris.catalog.TabletMeta; import org.apache.doris.catalog.View; +import org.apache.doris.catalog.external.ExternalTable; import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.clone.DynamicPartitionScheduler; import org.apache.doris.cluster.ClusterNamespace; @@ -240,6 +241,7 @@ import java.util.HashSet; import java.util.List; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.function.Predicate; @@ -2411,8 +2413,19 @@ private void handleShowTableStats() { ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt; TableIf tableIf = showTableStatsStmt.getTable(); long partitionId = showTableStatsStmt.getPartitionId(); + boolean showCache = showTableStatsStmt.isCached(); try { - if (partitionId > 0) { + if (tableIf instanceof ExternalTable && showCache) { + Optional tableStatistics = Env.getCurrentEnv().getStatisticsCache().getTableStatistics( + tableIf.getDatabase().getCatalog().getId(), + tableIf.getDatabase().getId(), + tableIf.getId()); + if (tableStatistics.isPresent()) { + resultSet = showTableStatsStmt.constructResultSet(tableStatistics.get()); + } else { + resultSet = showTableStatsStmt.constructResultSet(TableStatistic.UNKNOWN); + } + } else if (partitionId > 0) { TableStatistic partStats = StatisticsRepository.fetchTableLevelOfPartStats(partitionId); resultSet = showTableStatsStmt.constructResultSet(partStats); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index cb4d9eb034c1f8f..014c18e251ae62b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -276,10 +276,9 @@ public List buildAnalysisInfosForDB(DatabaseIf db, Analyz TableName tableName = new TableName(db.getCatalog().getName(), db.getFullName(), table.getName()); // columnNames null means to add all visitable columns. + // Will get all the visible columns in analyzeTblStmt.check() AnalyzeTblStmt analyzeTblStmt = new AnalyzeTblStmt(analyzeProperties, tableName, - table.getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())).map( - Column::getName).collect( - Collectors.toList()), db.getId(), table); + null, db.getId(), table); try { analyzeTblStmt.check(); } catch (AnalysisException analysisException) { @@ -808,6 +807,8 @@ public void dropStats(DropStatsStmt dropStatsStmt) throws DdlException { } if (dropStatsStmt.dropTableRowCount()) { StatisticsRepository.dropExternalTableStatistics(tblId); + // Table cache key doesn't care about catalog id and db id, because the table id is globally unique. + Env.getCurrentEnv().getStatisticsCache().invalidateTableStats(-1, -1, tblId); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java index 119368d91d78ef1..8a2bb0852c4dcfe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java @@ -17,6 +17,7 @@ package org.apache.doris.statistics; +import org.apache.doris.catalog.Env; import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.common.FeConstants; import org.apache.doris.common.util.TimeUtils; @@ -148,6 +149,7 @@ private void getTableStats() throws Exception { String rowCount = columnResult.get(0).getColumnValue("rowCount"); params.put("rowCount", rowCount); StatisticsRepository.persistTableStats(params); + Env.getCurrentEnv().getStatisticsCache().refreshTableStatsSync(catalog.getId(), db.getId(), tbl.getId()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java index 2a2ea06135f33b8..cfbaf364b5aea3c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java @@ -177,6 +177,14 @@ public void refreshColStatsSync(long catalogId, long dbId, long tblId, long idxI columnStatisticsCache.synchronous().refresh(new StatisticsCacheKey(catalogId, dbId, tblId, idxId, colName)); } + public void invalidateTableStats(long catalogId, long dbId, long tblId) { + tableStatisticsCache.synchronous().invalidate(new StatisticsCacheKey(catalogId, dbId, tblId)); + } + + public void refreshTableStatsSync(long catalogId, long dbId, long tblId) { + tableStatisticsCache.synchronous().refresh(new StatisticsCacheKey(catalogId, dbId, tblId)); + } + public void refreshHistogramSync(long tblId, long idxId, String colName) { histogramCache.synchronous().refresh(new StatisticsCacheKey(tblId, idxId, colName)); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticsCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticsCacheLoader.java index 817e74540fb7296..953bc9a42742b8a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticsCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticsCacheLoader.java @@ -36,7 +36,9 @@ public class TableStatisticsCacheLoader extends StatisticsCacheLoader doLoad(StatisticsCacheKey key) { try { TableStatistic tableStatistic = StatisticsRepository.fetchTableLevelStats(key.tableId); - return Optional.of(tableStatistic); + if (tableStatistic != TableStatistic.UNKNOWN) { + return Optional.of(tableStatistic); + } } catch (DdlException e) { LOG.debug("Fail to get table line number from table_statistics table. " + "Will try to get from data source.", e);