Skip to content

Commit

Permalink
[improvement](external statistics)Fix external stats collection bugs (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
Jibing-Li authored Aug 17, 2023
1 parent c4dcba5 commit 383e990
Show file tree
Hide file tree
Showing 10 changed files with 74 additions and 20 deletions.
4 changes: 2 additions & 2 deletions fe/fe-core/src/main/cup/sql_parser.cup
Original file line number Diff line number Diff line change
Expand Up @@ -4083,9 +4083,9 @@ show_param ::=
RESULT = new ShowSyncJobStmt(dbName);
:}
/* show table stats */
| KW_TABLE KW_STATS table_name:tbl opt_partition_names:partitionNames
| KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames
{:
RESULT = new ShowTableStatsStmt(tbl, partitionNames);
RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached);
:}
/* show column stats */
| KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.PartitionType;
import org.apache.doris.catalog.Table;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
Expand Down Expand Up @@ -148,17 +147,13 @@ private void checkPartitionAndColumn() throws AnalysisException {
DatabaseIf db = catalog.getDbOrAnalysisException(tableName.getDb());
TableIf table = db.getTableOrAnalysisException(tableName.getTbl());

if (table.getType() != Table.TableType.OLAP) {
throw new AnalysisException("Only OLAP table statistics are supported");
}

OlapTable olapTable = (OlapTable) table;
if (olapTable.getColumn(columnName) == null) {
if (table.getColumn(columnName) == null) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
columnName, FeNameFormat.getColumnNameRegex());
}

if (optPartitionNames != null) {
if (optPartitionNames != null && table instanceof OlapTable) {
OlapTable olapTable = (OlapTable) table;
if (olapTable.getPartitionInfo().getType().equals(PartitionType.UNPARTITIONED)) {
throw new AnalysisException("Not a partitioned table: " + olapTable.getName());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,11 @@ public void check() throws AnalysisException {
}
checkAnalyzePriv(tableName.getDb(), tableName.getTbl());
if (columnNames == null) {
columnNames = table.getBaseSchema(false)
.stream().map(Column::getName).collect(Collectors.toList());
// Filter unsupported type columns.
columnNames = table.getBaseSchema(false).stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName)
.collect(Collectors.toList());
}
table.readLock();
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@ public class ShowTableStatsStmt extends ShowStmt {
private final TableName tableName;

private final PartitionNames partitionNames;
private final boolean cached;

private TableIf table;

public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames) {
public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached) {
this.tableName = tableName;
this.partitionNames = partitionNames;
this.cached = cached;
}

public TableName getTableName() {
Expand Down Expand Up @@ -133,4 +135,8 @@ public ShowResultSet constructResultSet(TableStatistic tableStatistic) {
result.add(row);
return new ShowResultSet(getMetaData(), result);
}

public boolean isCached() {
return cached;
}
}
15 changes: 14 additions & 1 deletion fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@
import org.apache.doris.catalog.TabletInvertedIndex;
import org.apache.doris.catalog.TabletMeta;
import org.apache.doris.catalog.View;
import org.apache.doris.catalog.external.ExternalTable;
import org.apache.doris.catalog.external.HMSExternalTable;
import org.apache.doris.clone.DynamicPartitionScheduler;
import org.apache.doris.cluster.ClusterNamespace;
Expand Down Expand Up @@ -234,6 +235,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
Expand Down Expand Up @@ -2345,8 +2347,19 @@ private void handleShowTableStats() {
ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt;
TableIf tableIf = showTableStatsStmt.getTable();
long partitionId = showTableStatsStmt.getPartitionId();
boolean showCache = showTableStatsStmt.isCached();
try {
if (partitionId > 0) {
if (tableIf instanceof ExternalTable && showCache) {
Optional<TableStatistic> tableStatistics = Env.getCurrentEnv().getStatisticsCache().getTableStatistics(
tableIf.getDatabase().getCatalog().getId(),
tableIf.getDatabase().getId(),
tableIf.getId());
if (tableStatistics.isPresent()) {
resultSet = showTableStatsStmt.constructResultSet(tableStatistics.get());
} else {
resultSet = showTableStatsStmt.constructResultSet(TableStatistic.UNKNOWN);
}
} else if (partitionId > 0) {
TableStatistic partStats = StatisticsRepository.fetchTableLevelOfPartStats(partitionId);
resultSet = showTableStatsStmt.constructResultSet(partStats);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ public void createAnalysisJobs(AnalyzeDBStmt analyzeDBStmt, boolean proxy) throw
}
TableName tableName = new TableName(analyzeDBStmt.getCtlIf().getName(), db.getFullName(),
table.getName());
// columnNames null means to add all visitable columns.
// Will get all the visible columns in analyzeTblStmt.check()
AnalyzeTblStmt analyzeTblStmt = new AnalyzeTblStmt(analyzeDBStmt.getAnalyzeProperties(), tableName,
null, db.getId(), table);
try {
Expand Down Expand Up @@ -772,6 +774,8 @@ public void dropStats(DropStatsStmt dropStatsStmt) throws DdlException {
}
if (dropStatsStmt.dropTableRowCount()) {
StatisticsRepository.dropExternalTableStatistics(tblId);
// Table cache key doesn't care about catalog id and db id, because the table id is globally unique.
Env.getCurrentEnv().getStatisticsCache().invalidateTableStats(-1, -1, tblId);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.doris.statistics;

import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.external.HMSExternalTable;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.util.TimeUtils;
Expand Down Expand Up @@ -291,4 +292,13 @@ private void setParameterData(Map<String, String> parameters, Map<String, String
LocalDateTime.ofInstant(Instant.ofEpochMilli(Long.parseLong(timestamp) * 1000),
ZoneId.systemDefault())));
}

@Override
protected void afterExecution() {
if (isTableLevelTask) {
Env.getCurrentEnv().getStatisticsCache().refreshTableStatsSync(catalog.getId(), db.getId(), tbl.getId());
} else {
Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tbl.getId(), -1, col.getName());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,14 @@ public void refreshColStatsSync(long catalogId, long dbId, long tblId, long idxI
columnStatisticsCache.synchronous().refresh(new StatisticsCacheKey(catalogId, dbId, tblId, idxId, colName));
}

public void invalidateTableStats(long catalogId, long dbId, long tblId) {
tableStatisticsCache.synchronous().invalidate(new StatisticsCacheKey(catalogId, dbId, tblId));
}

public void refreshTableStatsSync(long catalogId, long dbId, long tblId) {
tableStatisticsCache.synchronous().refresh(new StatisticsCacheKey(catalogId, dbId, tblId));
}

public void refreshHistogramSync(long tblId, long idxId, String colName) {
histogramCache.synchronous().refresh(new StatisticsCacheKey(tblId, idxId, colName));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ public class TableStatisticsCacheLoader extends StatisticsCacheLoader<Optional<T
protected Optional<TableStatistic> doLoad(StatisticsCacheKey key) {
try {
TableStatistic tableStatistic = StatisticsRepository.fetchTableLevelStats(key.tableId);
return Optional.of(tableStatistic);
if (tableStatistic != TableStatistic.UNKNOWN) {
return Optional.of(tableStatistic);
}
} catch (DdlException e) {
LOG.debug("Fail to get table line number from table_statistics table. "
+ "Will try to get from data source.", e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,15 +221,28 @@ suite("test_hive_statistic", "p2") {
assertTrue(result[0][6] == "'AIR'")
assertTrue(result[0][7] == "'TRUCK'")

// sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS ('row_count'='6001215')"""
// result = sql "show column stats `statistics` (lo_shipmode)"
// assertTrue(result.size() == 1)
// assertTrue(result[0][0] == "lo_shipmode")
// assertTrue(result[0][1] == "6001215.0")
sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS ('row_count'='6001215')"""
result = sql "show column stats `statistics` (lo_shipmode)"
assertTrue(result.size() == 1)
assertTrue(result[0][0] == "lo_shipmode")
assertTrue(result[0][1] == "6001215.0")

sql """drop stats statistics"""
result = sql """show column stats statistics"""
assertTrue(result.size() == 0)

sql """analyze database `statistics` with sync"""
result = sql """show table stats statistics"""
assertTrue(result.size() == 1)
assertTrue(result[0][0] == "100")

result = sql """show table cached stats statistics"""
assertTrue(result.size() == 1)
assertTrue(result[0][0] == "100")

sql """drop stats statistics"""
result = sql """show column cached stats statistics"""
assertTrue(result.size() == 0)
}
}

0 comments on commit 383e990

Please sign in to comment.